public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Don't use frame pointer without stack access
@ 2017-08-06 19:40 H.J. Lu
  2017-08-07  6:21 ` Uros Bizjak
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-06 19:40 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

When there is no stack access, there is no need to use frame pointer
even if -fno-omit-frame-pointer is used.

Tested on i686 and x86-64.  OK for trunk?

H.J.
---
gcc/

	PR target/81736
	* config/i386/i386.c (ix86_finalize_stack_realign_flags): Renamed
	to ...
	(ix86_finalize_stack_frame_flags): This.  Also clear
	frame_pointer_needed if -fno-omit-frame-pointer is used without
	stack access.
	(ix86_expand_prologue): Replace ix86_finalize_stack_realign_flags
	with ix86_finalize_stack_frame_flags.
	(ix86_expand_epilogue): Likewise.
	(ix86_expand_split_stack_prologue): Likewise.

gcc/testsuite/

	PR target/81736
	* gcc.target/i386/pr81736-1.c: New test.
	* gcc.target/i386/pr81736-2.c: Likewise.
	* gcc.target/i386/pr81736-3.c: Likewise.
	* gcc.target/i386/pr81736-4.c: Likewise.
---
 gcc/config/i386/i386.c                    | 26 ++++++++++++++------------
 gcc/testsuite/gcc.target/i386/pr81736-1.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-2.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-3.c | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-4.c | 11 +++++++++++
 5 files changed, 63 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-4.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a5984659eb2..fb16b5e77a3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14116,22 +14116,24 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
-/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
-   to be generated in correct form.  */
+/* Finalize stack_realign_needed and frame_pointer_needed flags, which
+   will guide prologue/epilogue to be generated in correct form.  */
+
 static void
-ix86_finalize_stack_realign_flags (void)
+ix86_finalize_stack_frame_flags (void)
 {
   /* Check if stack realign is really needed after reload, and
      stores result in cfun */
   unsigned int incoming_stack_boundary
     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
-  unsigned int stack_realign
+  bool stack_realign
     = (incoming_stack_boundary
        < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
 	  ? crtl->max_used_stack_slot_alignment
 	  : crtl->stack_alignment_needed));
   bool recompute_frame_layout_p = false;
+  bool omit_frame_pointer = flag_omit_frame_pointer != 0;
 
   if (crtl->stack_realign_finalized)
     {
@@ -14142,13 +14144,13 @@ ix86_finalize_stack_realign_flags (void)
     }
 
   /* If the only reason for frame_pointer_needed is that we conservatively
-     assumed stack realignment might be needed, but in the end nothing that
-     needed the stack alignment had been spilled, clear frame_pointer_needed
-     and say we don't need stack realignment.  */
-  if (stack_realign
+     assumed stack realignment might be needed or -fno-omit-frame-pointer
+     is used, but in the end nothing that needed the stack alignment had
+     been spilled nor stack access, clear frame_pointer_needed and say we
+     don't need stack realignment.  */
+  if (stack_realign == omit_frame_pointer
       && frame_pointer_needed
       && crtl->is_leaf
-      && flag_omit_frame_pointer
       && crtl->sp_is_unchanging
       && !ix86_current_function_calls_tls_descriptor
       && !crtl->accesses_prior_frames
@@ -14339,7 +14341,7 @@ ix86_expand_prologue (void)
   if (ix86_function_naked (current_function_decl))
     return;
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
 
   /* DRAP should not coexist with stack_realign_fp */
   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
@@ -15203,7 +15205,7 @@ ix86_expand_epilogue (int style)
       return;
     }
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = m->frame;
 
   m->fs.sp_realigned = stack_realign_fp;
@@ -15738,7 +15740,7 @@ ix86_expand_split_stack_prologue (void)
 
   gcc_assert (flag_split_stack && reload_completed);
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = cfun->machine->frame;
   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1.c b/gcc/testsuite/gcc.target/i386/pr81736-1.c
new file mode 100644
index 00000000000..92c7bc97a0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int i;
+
+int
+foo (void)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-2.c b/gcc/testsuite/gcc.target/i386/pr81736-2.c
new file mode 100644
index 00000000000..a3720879937
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (int i)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-3.c b/gcc/testsuite/gcc.target/i386/pr81736-3.c
new file mode 100644
index 00000000000..c3bde7dd933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+void
+foo (void)
+{
+  asm ("# " : : : "ebx");
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-4.c b/gcc/testsuite/gcc.target/i386/pr81736-4.c
new file mode 100644
index 00000000000..25f50016a64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+foo (int i1, int i2, int i3, int i4, int i5, int i6, int i7)
+{
+  return i7;
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
-- 
2.13.3

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-06 19:40 [PATCH] i386: Don't use frame pointer without stack access H.J. Lu
@ 2017-08-07  6:21 ` Uros Bizjak
  2017-08-07 13:15   ` Michael Matz
  0 siblings, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-08-07  6:21 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Sun, Aug 6, 2017 at 9:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> When there is no stack access, there is no need to use frame pointer
> even if -fno-omit-frame-pointer is used.
>
> Tested on i686 and x86-64.  OK for trunk?
>
> H.J.
> ---
> gcc/
>
>         PR target/81736
>         * config/i386/i386.c (ix86_finalize_stack_realign_flags): Renamed
>         to ...
>         (ix86_finalize_stack_frame_flags): This.  Also clear
>         frame_pointer_needed if -fno-omit-frame-pointer is used without
>         stack access.
>         (ix86_expand_prologue): Replace ix86_finalize_stack_realign_flags
>         with ix86_finalize_stack_frame_flags.
>         (ix86_expand_epilogue): Likewise.
>         (ix86_expand_split_stack_prologue): Likewise.
>
> gcc/testsuite/
>
>         PR target/81736
>         * gcc.target/i386/pr81736-1.c: New test.
>         * gcc.target/i386/pr81736-2.c: Likewise.
>         * gcc.target/i386/pr81736-3.c: Likewise.
>         * gcc.target/i386/pr81736-4.c: Likewise.

LGTM.

Thanks,
Uros.

>  gcc/config/i386/i386.c                    | 26 ++++++++++++++------------
>  gcc/testsuite/gcc.target/i386/pr81736-1.c | 13 +++++++++++++
>  gcc/testsuite/gcc.target/i386/pr81736-2.c | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr81736-3.c | 11 +++++++++++
>  gcc/testsuite/gcc.target/i386/pr81736-4.c | 11 +++++++++++
>  5 files changed, 63 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-4.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index a5984659eb2..fb16b5e77a3 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -14116,22 +14116,24 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> -/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
> -   to be generated in correct form.  */
> +/* Finalize stack_realign_needed and frame_pointer_needed flags, which
> +   will guide prologue/epilogue to be generated in correct form.  */
> +
>  static void
> -ix86_finalize_stack_realign_flags (void)
> +ix86_finalize_stack_frame_flags (void)
>  {
>    /* Check if stack realign is really needed after reload, and
>       stores result in cfun */
>    unsigned int incoming_stack_boundary
>      = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
>         ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
> -  unsigned int stack_realign
> +  bool stack_realign
>      = (incoming_stack_boundary
>         < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
>           ? crtl->max_used_stack_slot_alignment
>           : crtl->stack_alignment_needed));
>    bool recompute_frame_layout_p = false;
> +  bool omit_frame_pointer = flag_omit_frame_pointer != 0;
>
>    if (crtl->stack_realign_finalized)
>      {
> @@ -14142,13 +14144,13 @@ ix86_finalize_stack_realign_flags (void)
>      }
>
>    /* If the only reason for frame_pointer_needed is that we conservatively
> -     assumed stack realignment might be needed, but in the end nothing that
> -     needed the stack alignment had been spilled, clear frame_pointer_needed
> -     and say we don't need stack realignment.  */
> -  if (stack_realign
> +     assumed stack realignment might be needed or -fno-omit-frame-pointer
> +     is used, but in the end nothing that needed the stack alignment had
> +     been spilled nor stack access, clear frame_pointer_needed and say we
> +     don't need stack realignment.  */
> +  if (stack_realign == omit_frame_pointer
>        && frame_pointer_needed
>        && crtl->is_leaf
> -      && flag_omit_frame_pointer
>        && crtl->sp_is_unchanging
>        && !ix86_current_function_calls_tls_descriptor
>        && !crtl->accesses_prior_frames
> @@ -14339,7 +14341,7 @@ ix86_expand_prologue (void)
>    if (ix86_function_naked (current_function_decl))
>      return;
>
> -  ix86_finalize_stack_realign_flags ();
> +  ix86_finalize_stack_frame_flags ();
>
>    /* DRAP should not coexist with stack_realign_fp */
>    gcc_assert (!(crtl->drap_reg && stack_realign_fp));
> @@ -15203,7 +15205,7 @@ ix86_expand_epilogue (int style)
>        return;
>      }
>
> -  ix86_finalize_stack_realign_flags ();
> +  ix86_finalize_stack_frame_flags ();
>    frame = m->frame;
>
>    m->fs.sp_realigned = stack_realign_fp;
> @@ -15738,7 +15740,7 @@ ix86_expand_split_stack_prologue (void)
>
>    gcc_assert (flag_split_stack && reload_completed);
>
> -  ix86_finalize_stack_realign_flags ();
> +  ix86_finalize_stack_frame_flags ();
>    frame = cfun->machine->frame;
>    allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1.c b/gcc/testsuite/gcc.target/i386/pr81736-1.c
> new file mode 100644
> index 00000000000..92c7bc97a0d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr81736-1.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-omit-frame-pointer" } */
> +
> +extern int i;
> +
> +int
> +foo (void)
> +{
> +  return i;
> +}
> +
> +/* No need to use a frame pointer.  */
> +/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr81736-2.c b/gcc/testsuite/gcc.target/i386/pr81736-2.c
> new file mode 100644
> index 00000000000..a3720879937
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr81736-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-omit-frame-pointer" } */
> +
> +int
> +#ifndef __x86_64__
> +__attribute__((regparm(3)))
> +#endif
> +foo (int i)
> +{
> +  return i;
> +}
> +
> +/* No need to use a frame pointer.  */
> +/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr81736-3.c b/gcc/testsuite/gcc.target/i386/pr81736-3.c
> new file mode 100644
> index 00000000000..c3bde7dd933
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr81736-3.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-omit-frame-pointer" } */
> +
> +void
> +foo (void)
> +{
> +  asm ("# " : : : "ebx");
> +}
> +
> +/* Need to use a frame pointer.  */
> +/* { dg-final { scan-assembler "%\[re\]bp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr81736-4.c b/gcc/testsuite/gcc.target/i386/pr81736-4.c
> new file mode 100644
> index 00000000000..25f50016a64
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr81736-4.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-omit-frame-pointer" } */
> +
> +int
> +foo (int i1, int i2, int i3, int i4, int i5, int i6, int i7)
> +{
> +  return i7;
> +}
> +
> +/* Need to use a frame pointer.  */
> +/* { dg-final { scan-assembler "%\[re\]bp" } } */
> --
> 2.13.3
>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07  6:21 ` Uros Bizjak
@ 2017-08-07 13:15   ` Michael Matz
  2017-08-07 13:21     ` Uros Bizjak
  0 siblings, 1 reply; 41+ messages in thread
From: Michael Matz @ 2017-08-07 13:15 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: H.J. Lu, gcc-patches

Hi,

On Mon, 7 Aug 2017, Uros Bizjak wrote:

> On Sun, Aug 6, 2017 at 9:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> > When there is no stack access, there is no need to use frame pointer
> > even if -fno-omit-frame-pointer is used.
> >
> > Tested on i686 and x86-64.  OK for trunk?
> 
> LGTM.

This will break unwinders relying on frame pointers to exist on all 
functions, for which projects conciously forced a frame pointer with this 
option.  I don't think we can simply override user specified explicit 
wishes in this way, presumably they had a reason to use it.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:15   ` Michael Matz
@ 2017-08-07 13:21     ` Uros Bizjak
  2017-08-07 13:25       ` H.J. Lu
  0 siblings, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-08-07 13:21 UTC (permalink / raw)
  To: Michael Matz; +Cc: H.J. Lu, gcc-patches

On Mon, Aug 7, 2017 at 3:15 PM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> On Mon, 7 Aug 2017, Uros Bizjak wrote:
>
>> On Sun, Aug 6, 2017 at 9:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> > When there is no stack access, there is no need to use frame pointer
>> > even if -fno-omit-frame-pointer is used.
>> >
>> > Tested on i686 and x86-64.  OK for trunk?
>>
>> LGTM.
>
> This will break unwinders relying on frame pointers to exist on all
> functions, for which projects conciously forced a frame pointer with this
> option.  I don't think we can simply override user specified explicit
> wishes in this way, presumably they had a reason to use it.

Hm... yes, you are right.

HJ, please revert the patch.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:21     ` Uros Bizjak
@ 2017-08-07 13:25       ` H.J. Lu
  2017-08-07 13:32         ` Michael Matz
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-07 13:25 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Michael Matz, gcc-patches

On Mon, Aug 7, 2017 at 6:21 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Mon, Aug 7, 2017 at 3:15 PM, Michael Matz <matz@suse.de> wrote:
>> Hi,
>>
>> On Mon, 7 Aug 2017, Uros Bizjak wrote:
>>
>>> On Sun, Aug 6, 2017 at 9:40 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> > When there is no stack access, there is no need to use frame pointer
>>> > even if -fno-omit-frame-pointer is used.
>>> >
>>> > Tested on i686 and x86-64.  OK for trunk?
>>>
>>> LGTM.
>>
>> This will break unwinders relying on frame pointers to exist on all
>> functions, for which projects conciously forced a frame pointer with this
>> option.  I don't think we can simply override user specified explicit
>> wishes in this way, presumably they had a reason to use it.
>
> Hm... yes, you are right.
>
> HJ, please revert the patch.
>

Is there a testcae?  I'd like to add it.

FWIW,

[hjl@gnu-tools-1 pr81736]$ cat x.i
extern int i;

int
foo (void)
{
  return i;
}
[hjl@gnu-tools-1 pr81736]$ clang -S -O2 -fno-omit-frame-pointer x.i
[hjl@gnu-tools-1 pr81736]$ cat x.s
.text
.file "x.i"
.globl foo
.p2align 4, 0x90
.type foo,@function
foo:                                    # @foo
.cfi_startproc
# BB#0:
movl i(%rip), %eax
retq
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
.cfi_endproc


.ident "clang version 4.0.0 (tags/RELEASE_400/final)"
.section ".note.GNU-stack","",@progbits
[hjl@gnu-tools-1 pr81736]$

Does it mean clang is broken?


-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:25       ` H.J. Lu
@ 2017-08-07 13:32         ` Michael Matz
  2017-08-07 13:38           ` H.J. Lu
  2017-08-07 13:38           ` Andreas Schwab
  0 siblings, 2 replies; 41+ messages in thread
From: Michael Matz @ 2017-08-07 13:32 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

Hi,

On Mon, 7 Aug 2017, H.J. Lu wrote:

> >> This will break unwinders relying on frame pointers to exist on all
> >> functions, for which projects conciously forced a frame pointer with this
> >> option.  I don't think we can simply override user specified explicit
> >> wishes in this way, presumably they had a reason to use it.
> >
> > Hm... yes, you are right.
> >
> > HJ, please revert the patch.
> >
> 
> Is there a testcae?  I'd like to add it.

Trivial change of your first example, see below.

> [hjl@gnu-tools-1 pr81736]$ clang -S -O2 -fno-omit-frame-pointer x.i
> [hjl@gnu-tools-1 pr81736]$ cat x.s
[... no frame ...]
> [hjl@gnu-tools-1 pr81736]$
> 
> Does it mean clang is broken?

In my book, yes.


Ciao,
Michael.

Index: gcc/testsuite/gcc.target/i386/force-frame.c
===================================================================
--- gcc/testsuite/gcc.target/i386/force-frame.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/force-frame.c	(working copy)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (int i)
+{
+  return i;
+}
+
+/* The user wants a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:32         ` Michael Matz
  2017-08-07 13:38           ` H.J. Lu
@ 2017-08-07 13:38           ` Andreas Schwab
  1 sibling, 0 replies; 41+ messages in thread
From: Andreas Schwab @ 2017-08-07 13:38 UTC (permalink / raw)
  To: Michael Matz; +Cc: H.J. Lu, Uros Bizjak, gcc-patches

On Aug 07 2017, Michael Matz <matz@suse.de> wrote:

> +/* { dg-final { scan-assembler "%\[re\]bp" } } */

Please use {} for regexps.

Andreas.

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:32         ` Michael Matz
@ 2017-08-07 13:38           ` H.J. Lu
  2017-08-07 13:49             ` Michael Matz
  2017-08-07 13:38           ` Andreas Schwab
  1 sibling, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-07 13:38 UTC (permalink / raw)
  To: Michael Matz; +Cc: Uros Bizjak, gcc-patches

On Mon, Aug 7, 2017 at 6:32 AM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> On Mon, 7 Aug 2017, H.J. Lu wrote:
>
>> >> This will break unwinders relying on frame pointers to exist on all
>> >> functions, for which projects conciously forced a frame pointer with this
>> >> option.  I don't think we can simply override user specified explicit
>> >> wishes in this way, presumably they had a reason to use it.
>> >
>> > Hm... yes, you are right.
>> >
>> > HJ, please revert the patch.
>> >
>>
>> Is there a testcae?  I'd like to add it.
>
> Trivial change of your first example, see below.
>
>> [hjl@gnu-tools-1 pr81736]$ clang -S -O2 -fno-omit-frame-pointer x.i
>> [hjl@gnu-tools-1 pr81736]$ cat x.s
> [... no frame ...]
>> [hjl@gnu-tools-1 pr81736]$
>>
>> Does it mean clang is broken?
>
> In my book, yes.

Does GCC do this for all targets or just x86?

>
> Ciao,
> Michael.
>
> Index: gcc/testsuite/gcc.target/i386/force-frame.c
> ===================================================================
> --- gcc/testsuite/gcc.target/i386/force-frame.c (revision 0)
> +++ gcc/testsuite/gcc.target/i386/force-frame.c (working copy)
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-omit-frame-pointer" } */
> +
> +int
> +#ifndef __x86_64__
> +__attribute__((regparm(3)))
> +#endif
> +foo (int i)
> +{
> +  return i;
> +}
> +
> +/* The user wants a frame pointer.  */
> +/* { dg-final { scan-assembler "%\[re\]bp" } } */

I am looking for a run-time test which breaks unwinder.

-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:38           ` H.J. Lu
@ 2017-08-07 13:49             ` Michael Matz
  2017-08-07 14:06               ` Alexander Monakov
  2017-08-07 18:40               ` Uros Bizjak
  0 siblings, 2 replies; 41+ messages in thread
From: Michael Matz @ 2017-08-07 13:49 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, gcc-patches

Hi,

On Mon, 7 Aug 2017, H.J. Lu wrote:

> >> [hjl@gnu-tools-1 pr81736]$
> >>
> >> Does it mean clang is broken?
> >
> > In my book, yes.
> 
> Does GCC do this for all targets or just x86?

No idea.  If so I'd say those other targets are broken as well (as long as 
the concept of frame pointer makes sense on them, their ABI defines one 
but leaves it optional and something like an unwinder could make use of 
it).

> I am looking for a run-time test which breaks unwinder.

I don't have one handy.  Idea: make two threads, one endlessly looping in 
the "frame-less" function, the other causing a signal to the first thread, 
and the signal handler checking that unwinding up to caller of 
frame_less() is possible via %[er]bp chaining.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:49             ` Michael Matz
@ 2017-08-07 14:06               ` Alexander Monakov
  2017-08-07 15:39                 ` H.J. Lu
  2017-08-07 18:40               ` Uros Bizjak
  1 sibling, 1 reply; 41+ messages in thread
From: Alexander Monakov @ 2017-08-07 14:06 UTC (permalink / raw)
  To: Michael Matz; +Cc: H.J. Lu, Uros Bizjak, gcc-patches

On Mon, 7 Aug 2017, Michael Matz wrote:
> > I am looking for a run-time test which breaks unwinder.
> 
> I don't have one handy.  Idea: make two threads, one endlessly looping in 
> the "frame-less" function, the other causing a signal to the first thread, 
> and the signal handler checking that unwinding up to caller of 
> frame_less() is possible via %[er]bp chaining.

You'd probably have to arrange frame_less modify %rbp, otherwise unwinding
might "appear to work" by virtue of %rbp being valid for the outer frame.

I think one specific, real-life use case that may be potentially hurt by
this change is using linux-perf with backtrace recording, for programs with
hot functions that don't otherwise access the stack (which is plausible for
leaf functions with hot loops).

Alexander

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 14:06               ` Alexander Monakov
@ 2017-08-07 15:39                 ` H.J. Lu
  2017-08-07 15:43                   ` Jakub Jelinek
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-07 15:39 UTC (permalink / raw)
  To: Alexander Monakov, Arjan van de Ven
  Cc: Michael Matz, Uros Bizjak, gcc-patches

On Mon, Aug 7, 2017 at 7:06 AM, Alexander Monakov <amonakov@ispras.ru> wrote:
> On Mon, 7 Aug 2017, Michael Matz wrote:
>> > I am looking for a run-time test which breaks unwinder.
>>
>> I don't have one handy.  Idea: make two threads, one endlessly looping in
>> the "frame-less" function, the other causing a signal to the first thread,
>> and the signal handler checking that unwinding up to caller of
>> frame_less() is possible via %[er]bp chaining.
>
> You'd probably have to arrange frame_less modify %rbp, otherwise unwinding
> might "appear to work" by virtue of %rbp being valid for the outer frame.
>
> I think one specific, real-life use case that may be potentially hurt by
> this change is using linux-perf with backtrace recording, for programs with
> hot functions that don't otherwise access the stack (which is plausible for
> leaf functions with hot loops).
>
> Alexander

This code is very silly with very little benefit:

[hjl@gnu-6 tmp]$ cat x.c
extern void bar (void);

void
foo (void)
{
  bar ();
}
[hjl@gnu-6 tmp]$ gcc -fno-omit-frame-pointer x.c -S -O2
[hjl@gnu-6 tmp]$ cat x.s
.file "x.c"
.text
.p2align 4,,15
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
popq %rbp
.cfi_def_cfa 7, 8
jmp bar
.cfi_endproc
.LFE0:
.size foo, .-foo
.ident "GCC: (GNU) 7.1.1 20170709 (Red Hat 7.1.1-4)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-6 tmp]$

When another compiler does this optimization, applications won't
expect

pushq %rbp
movq %rsp, %rbp
popq %rbp
jmp bar

When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
this optimization removes more than 730

pushq %rbp
movq %rsp, %rbp
popq %rbp

Can we apply this optimization when function body has less than 6
instructions, similar to ix86_pad_short_function?

-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 15:39                 ` H.J. Lu
@ 2017-08-07 15:43                   ` Jakub Jelinek
  2017-08-07 16:06                     ` Arjan van de Ven
  0 siblings, 1 reply; 41+ messages in thread
From: Jakub Jelinek @ 2017-08-07 15:43 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Alexander Monakov, Arjan van de Ven, Michael Matz, Uros Bizjak,
	gcc-patches

On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
> this optimization removes more than 730
> 
> pushq %rbp
> movq %rsp, %rbp
> popq %rbp

If you don't want the frame pointer, why are you compiling with
-fno-omit-frame-pointer?  Are you going to add
-fforce-no-omit-frame-pointer or something similar so that people can
actually get what they are asking for?  This doesn't really make sense.
It is perfectly fine to omit frame pointer by default, when it isn't
required for something, but if the user asks for it, we shouldn't ignore his
request.

	Jakub

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 15:43                   ` Jakub Jelinek
@ 2017-08-07 16:06                     ` Arjan van de Ven
  2017-08-07 16:16                       ` Michael Matz
  2017-08-07 20:05                       ` Richard Sandiford
  0 siblings, 2 replies; 41+ messages in thread
From: Arjan van de Ven @ 2017-08-07 16:06 UTC (permalink / raw)
  To: Jakub Jelinek, H.J. Lu
  Cc: Alexander Monakov, Michael Matz, Uros Bizjak, gcc-patches

On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>> this optimization removes more than 730
>>
>> pushq %rbp
>> movq %rsp, %rbp
>> popq %rbp
>
> If you don't want the frame pointer, why are you compiling with
> -fno-omit-frame-pointer?  Are you going to add
> -fforce-no-omit-frame-pointer or something similar so that people can
> actually get what they are asking for?  This doesn't really make sense.
> It is perfectly fine to omit frame pointer by default, when it isn't
> required for something, but if the user asks for it, we shouldn't ignore his
> request.
>


wanting a framepointer is very nice and desired...
... but if the optimizer/ins scheduler moves instructions outside of the frame'd portion,
(it does it for cases like below as well), the value is already negative for these
functions that don't have stack use.

<MPIDU_Sched_are_pending@@Base>:
mov    all_schedules@@Base-0x38460,%rax
push   %rbp
mov    %rsp,%rbp
pop    %rbp
cmpq   $0x0,(%rax)
setne  %al
movzbl %al,%eax
retq

(gcc 7.1 compiling mpich with LTO)

specifically the push/mov/pop back to back makes no sense at all to me.
if there was meat before the pop, sure.
but when there isn't......



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 16:06                     ` Arjan van de Ven
@ 2017-08-07 16:16                       ` Michael Matz
  2017-08-07 16:19                         ` Arjan van de Ven
  2017-08-07 20:05                       ` Richard Sandiford
  1 sibling, 1 reply; 41+ messages in thread
From: Michael Matz @ 2017-08-07 16:16 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Jakub Jelinek, H.J. Lu, Alexander Monakov, Uros Bizjak, gcc-patches

Hi,

On Mon, 7 Aug 2017, Arjan van de Ven wrote:

> wanting a framepointer is very nice and desired...
> ... but if the optimizer/ins scheduler moves instructions outside of the
> frame'd portion,
> (it does it for cases like below as well), the value is already negative for
> these
> functions that don't have stack use.
> 
> <MPIDU_Sched_are_pending@@Base>:
> mov    all_schedules@@Base-0x38460,%rax
> push   %rbp
> mov    %rsp,%rbp
> pop    %rbp
> cmpq   $0x0,(%rax)
> setne  %al
> movzbl %al,%eax
> retq

Then don't compile with -fno-omit-frame-pointer.  You explicitely 
requested one, so why are you surprised to see one?

> specifically the push/mov/pop back to back makes no sense at all to me.
> if there was meat before the pop, sure.
> but when there isn't......

That can be fixed by making the pop a real scheduling barrier.  If we 
should do that I don't know (I'd lean towards not having to do that for 
loopless code).


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 16:16                       ` Michael Matz
@ 2017-08-07 16:19                         ` Arjan van de Ven
  2017-08-07 16:21                           ` H.J. Lu
  2017-08-07 16:28                           ` Michael Matz
  0 siblings, 2 replies; 41+ messages in thread
From: Arjan van de Ven @ 2017-08-07 16:19 UTC (permalink / raw)
  To: Michael Matz
  Cc: Jakub Jelinek, H.J. Lu, Alexander Monakov, Uros Bizjak, gcc-patches

On 8/7/2017 9:16 AM, Michael Matz wrote:
> Hi,
>
> On Mon, 7 Aug 2017, Arjan van de Ven wrote:
>
>> wanting a framepointer is very nice and desired...
>> ... but if the optimizer/ins scheduler moves instructions outside of the
>> frame'd portion,
>> (it does it for cases like below as well), the value is already negative for
>> these
>> functions that don't have stack use.
>>
>> <MPIDU_Sched_are_pending@@Base>:
>> mov    all_schedules@@Base-0x38460,%rax
>> push   %rbp
>> mov    %rsp,%rbp
>> pop    %rbp
>> cmpq   $0x0,(%rax)
>> setne  %al
>> movzbl %al,%eax
>> retq
>
> Then don't compile with -fno-omit-frame-pointer.  You explicitely
> requested one, so why are you surprised to see one?

I'm not surprised to see one.
I'm surprised to see a useless one.

The "perf" benefit is real, and that's why I asked for one... but the reorder made it an expensive
3 instruction nop for all intents and purposes.
If the pop was just before the ret, sure. It's not.

Maybe a different angle would be for a peephole phase to just eliminate the useless (even for those
who do want a frame pointer) push/mov/pop


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 16:19                         ` Arjan van de Ven
@ 2017-08-07 16:21                           ` H.J. Lu
  2017-08-07 16:28                           ` Michael Matz
  1 sibling, 0 replies; 41+ messages in thread
From: H.J. Lu @ 2017-08-07 16:21 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Michael Matz, Jakub Jelinek, Alexander Monakov, Uros Bizjak, gcc-patches

On Mon, Aug 7, 2017 at 9:19 AM, Arjan van de Ven <arjan@linux.intel.com> wrote:
> On 8/7/2017 9:16 AM, Michael Matz wrote:
>>
>> Hi,
>>
>> On Mon, 7 Aug 2017, Arjan van de Ven wrote:
>>
>>> wanting a framepointer is very nice and desired...
>>> ... but if the optimizer/ins scheduler moves instructions outside of the
>>> frame'd portion,
>>> (it does it for cases like below as well), the value is already negative
>>> for
>>> these
>>> functions that don't have stack use.
>>>
>>> <MPIDU_Sched_are_pending@@Base>:
>>> mov    all_schedules@@Base-0x38460,%rax
>>> push   %rbp
>>> mov    %rsp,%rbp
>>> pop    %rbp
>>> cmpq   $0x0,(%rax)
>>> setne  %al
>>> movzbl %al,%eax
>>> retq
>>
>>
>> Then don't compile with -fno-omit-frame-pointer.  You explicitely
>> requested one, so why are you surprised to see one?
>
>
> I'm not surprised to see one.
> I'm surprised to see a useless one.
>
> The "perf" benefit is real, and that's why I asked for one... but the
> reorder made it an expensive
> 3 instruction nop for all intents and purposes.
> If the pop was just before the ret, sure. It's not.
>
> Maybe a different angle would be for a peephole phase to just eliminate the
> useless (even for those
> who do want a frame pointer) push/mov/pop
>

I have seen GCC generate

push %rbp
pop %rbp


-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 16:19                         ` Arjan van de Ven
  2017-08-07 16:21                           ` H.J. Lu
@ 2017-08-07 16:28                           ` Michael Matz
  1 sibling, 0 replies; 41+ messages in thread
From: Michael Matz @ 2017-08-07 16:28 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Jakub Jelinek, H.J. Lu, Alexander Monakov, Uros Bizjak, gcc-patches

Hi,

On Mon, 7 Aug 2017, Arjan van de Ven wrote:

> I'm not surprised to see one.
> I'm surprised to see a useless one.
> 
> The "perf" benefit is real, and that's why I asked for one... but the reorder
> made it an expensive 3 instruction nop for all intents and purposes.
> If the pop was just before the ret, sure. It's not.

Okay, that seems a reasonable request.  But IMHO independend from the 
issue of simply ignoring -fno-omit-frame-pointer to which I object.

> Maybe a different angle would be for a peephole phase to just eliminate 
> the useless (even for those who do want a frame pointer) push/mov/pop

For instance.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 13:49             ` Michael Matz
  2017-08-07 14:06               ` Alexander Monakov
@ 2017-08-07 18:40               ` Uros Bizjak
  1 sibling, 0 replies; 41+ messages in thread
From: Uros Bizjak @ 2017-08-07 18:40 UTC (permalink / raw)
  To: Michael Matz; +Cc: H.J. Lu, gcc-patches

On Mon, Aug 7, 2017 at 3:48 PM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> On Mon, 7 Aug 2017, H.J. Lu wrote:
>
>> >> [hjl@gnu-tools-1 pr81736]$
>> >>
>> >> Does it mean clang is broken?
>> >
>> > In my book, yes.
>>
>> Does GCC do this for all targets or just x86?
>
> No idea.  If so I'd say those other targets are broken as well (as long as
> the concept of frame pointer makes sense on them, their ABI defines one
> but leaves it optional and something like an unwinder could make use of
> it).
>
>> I am looking for a run-time test which breaks unwinder.
>
> I don't have one handy.  Idea: make two threads, one endlessly looping in
> the "frame-less" function, the other causing a signal to the first thread,
> and the signal handler checking that unwinding up to caller of
> frame_less() is possible via %[er]bp chaining.

OTOH, even if the function doesn't make its own frame, the %[er]bp is
still untouched and points to the calling function frame. So, I think
that even in that case unwinding should be unharmed.

Frame setupsthat HJ presented are really unneeded, so there is a clear benefit.

How about we keep the patch for a while and see if something indeed
breaks? Linux live patching looks quite fragile in this area.

Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 16:06                     ` Arjan van de Ven
  2017-08-07 16:16                       ` Michael Matz
@ 2017-08-07 20:05                       ` Richard Sandiford
  2017-08-08 16:38                         ` H.J. Lu
  1 sibling, 1 reply; 41+ messages in thread
From: Richard Sandiford @ 2017-08-07 20:05 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Jakub Jelinek, H.J. Lu, Alexander Monakov, Michael Matz,
	Uros Bizjak, gcc-patches

Arjan van de Ven <arjan@linux.intel.com> writes:
> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>>> this optimization removes more than 730
>>>
>>> pushq %rbp
>>> movq %rsp, %rbp
>>> popq %rbp
>>
>> If you don't want the frame pointer, why are you compiling with
>> -fno-omit-frame-pointer?  Are you going to add
>> -fforce-no-omit-frame-pointer or something similar so that people can
>> actually get what they are asking for?  This doesn't really make sense.
>> It is perfectly fine to omit frame pointer by default, when it isn't
>> required for something, but if the user asks for it, we shouldn't ignore his
>> request.
>>
>
>
> wanting a framepointer is very nice and desired...  ... but if the
> optimizer/ins scheduler moves instructions outside of the frame'd
> portion, (it does it for cases like below as well), the value is
> already negative for these functions that don't have stack use.
>
> <MPIDU_Sched_are_pending@@Base>:
> mov    all_schedules@@Base-0x38460,%rax
> push   %rbp
> mov    %rsp,%rbp
> pop    %rbp
> cmpq   $0x0,(%rax)
> setne  %al
> movzbl %al,%eax
> retq

Yeah, and it could be even weirder for big single-block functions.
I think GCC has been doing this kind of scheduling of prologue and
epilogue instructions for a while, so there hasn*t really been a
guarantee which parts of the function will have a new FP and which
will still have the old one.

Also, with an arbitrarily-picked host compiler (GCC 6.3.1), shrink-wrapping
kicks in when the following is compiled with -O3 -fno-omit-frame-pointer:

    void f (int *);
    void
    g (int *x)
    {
      for (int i = 0; i < 1000; ++i)
        x[i] += 1;
      if (x[0])
        {
          int temp;
          f (&temp);
        }
    }

so only the block with the call to f sets up FP.  The relatively
long-running loop runs with the caller's FP.

I hope we can go for a target-independent position that what HJ*s
patch does is OK...

Thanks,
Richard

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-07 20:05                       ` Richard Sandiford
@ 2017-08-08 16:38                         ` H.J. Lu
  2017-08-08 17:01                           ` Richard Biener
  2017-08-08 17:05                           ` Uros Bizjak
  0 siblings, 2 replies; 41+ messages in thread
From: H.J. Lu @ 2017-08-08 16:38 UTC (permalink / raw)
  To: Arjan van de Ven, Jakub Jelinek, H.J. Lu, Alexander Monakov,
	Michael Matz, Uros Bizjak, gcc-patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 2539 bytes --]

On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Arjan van de Ven <arjan@linux.intel.com> writes:
>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>>>> this optimization removes more than 730
>>>>
>>>> pushq %rbp
>>>> movq %rsp, %rbp
>>>> popq %rbp
>>>
>>> If you don't want the frame pointer, why are you compiling with
>>> -fno-omit-frame-pointer?  Are you going to add
>>> -fforce-no-omit-frame-pointer or something similar so that people can
>>> actually get what they are asking for?  This doesn't really make sense.
>>> It is perfectly fine to omit frame pointer by default, when it isn't
>>> required for something, but if the user asks for it, we shouldn't ignore his
>>> request.
>>>
>>
>>
>> wanting a framepointer is very nice and desired...  ... but if the
>> optimizer/ins scheduler moves instructions outside of the frame'd
>> portion, (it does it for cases like below as well), the value is
>> already negative for these functions that don't have stack use.
>>
>> <MPIDU_Sched_are_pending@@Base>:
>> mov    all_schedules@@Base-0x38460,%rax
>> push   %rbp
>> mov    %rsp,%rbp
>> pop    %rbp
>> cmpq   $0x0,(%rax)
>> setne  %al
>> movzbl %al,%eax
>> retq
>
> Yeah, and it could be even weirder for big single-block functions.
> I think GCC has been doing this kind of scheduling of prologue and
> epilogue instructions for a while, so there hasn*t really been a
> guarantee which parts of the function will have a new FP and which
> will still have the old one.
>
> Also, with an arbitrarily-picked host compiler (GCC 6.3.1), shrink-wrapping
> kicks in when the following is compiled with -O3 -fno-omit-frame-pointer:
>
>     void f (int *);
>     void
>     g (int *x)
>     {
>       for (int i = 0; i < 1000; ++i)
>         x[i] += 1;
>       if (x[0])
>         {
>           int temp;
>           f (&temp);
>         }
>     }
>
> so only the block with the call to f sets up FP.  The relatively
> long-running loop runs with the caller's FP.
>
> I hope we can go for a target-independent position that what HJ*s
> patch does is OK...
>

In light of this,  I am resubmitting my patch.  I added 3 more testcases
and also handle:

typedef int v8si __attribute__ ((vector_size (32)));

void
foo (v8si *out_start, v8si *out_end, v8si *regions)
{
    v8si base = regions[3];
    *out_start = base;
    *out_end = base;
}

OK for trunk?

Thanks.

-- 
H.J.

[-- Attachment #2: 0001-i386-Don-t-use-frame-pointer-without-stack-access.patch --]
[-- Type: text/x-patch, Size: 8169 bytes --]

From d18008a8052973ab8582a1662f42669a9d318d0d Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 6 Aug 2017 06:24:36 -0700
Subject: [PATCH] i386: Don't use frame pointer without stack access

When there is no stack access, there is no need to use frame pointer
even if -fno-omit-frame-pointer is used.

gcc/

	PR target/81736
	* config/i386/i386.c (ix86_finalize_stack_realign_flags): Renamed
	to ...
	(ix86_finalize_stack_frame_flags): This.  Also clear
	frame_pointer_needed if -fno-omit-frame-pointer is used without
	stack access.
	(ix86_expand_prologue): Replace ix86_finalize_stack_realign_flags
	with ix86_finalize_stack_frame_flags.
	(ix86_expand_epilogue): Likewise.
	(ix86_expand_split_stack_prologue): Likewise.

gcc/testsuite/

	PR target/81736
	* gcc.target/i386/pr81736-1.c: New test.
	* gcc.target/i386/pr81736-2.c: Likewise.
	* gcc.target/i386/pr81736-3.c: Likewise.
	* gcc.target/i386/pr81736-4.c: Likewise.
	* gcc.target/i386/pr81736-5.c: Likewise.
	* gcc.target/i386/pr81736-6.c: Likewise.
	* gcc.target/i386/pr81736-7.c: Likewise.
---
 gcc/config/i386/i386.c                    | 23 ++++++++++++-----------
 gcc/testsuite/gcc.target/i386/pr81736-1.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-2.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-3.c | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-4.c | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-5.c | 20 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-6.c | 16 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-7.c | 13 +++++++++++++
 8 files changed, 110 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-7.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dfef996e36c..ed51595298d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14116,10 +14116,11 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
-/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
-   to be generated in correct form.  */
+/* Finalize stack_realign_needed and frame_pointer_needed flags, which
+   will guide prologue/epilogue to be generated in correct form.  */
+
 static void
-ix86_finalize_stack_realign_flags (void)
+ix86_finalize_stack_frame_flags (void)
 {
   /* Check if stack realign is really needed after reload, and
      stores result in cfun */
@@ -14142,13 +14143,13 @@ ix86_finalize_stack_realign_flags (void)
     }
 
   /* If the only reason for frame_pointer_needed is that we conservatively
-     assumed stack realignment might be needed, but in the end nothing that
-     needed the stack alignment had been spilled, clear frame_pointer_needed
-     and say we don't need stack realignment.  */
-  if (stack_realign
+     assumed stack realignment might be needed or -fno-omit-frame-pointer
+     is used, but in the end nothing that needed the stack alignment had
+     been spilled nor stack access, clear frame_pointer_needed and say we
+     don't need stack realignment.  */
+  if ((stack_realign || !flag_omit_frame_pointer)
       && frame_pointer_needed
       && crtl->is_leaf
-      && flag_omit_frame_pointer
       && crtl->sp_is_unchanging
       && !ix86_current_function_calls_tls_descriptor
       && !crtl->accesses_prior_frames
@@ -14339,7 +14340,7 @@ ix86_expand_prologue (void)
   if (ix86_function_naked (current_function_decl))
     return;
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
 
   /* DRAP should not coexist with stack_realign_fp */
   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
@@ -15203,7 +15204,7 @@ ix86_expand_epilogue (int style)
       return;
     }
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = m->frame;
 
   m->fs.sp_realigned = stack_realign_fp;
@@ -15738,7 +15739,7 @@ ix86_expand_split_stack_prologue (void)
 
   gcc_assert (flag_split_stack && reload_completed);
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = cfun->machine->frame;
   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1.c b/gcc/testsuite/gcc.target/i386/pr81736-1.c
new file mode 100644
index 00000000000..92c7bc97a0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int i;
+
+int
+foo (void)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-2.c b/gcc/testsuite/gcc.target/i386/pr81736-2.c
new file mode 100644
index 00000000000..a3720879937
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (int i)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-3.c b/gcc/testsuite/gcc.target/i386/pr81736-3.c
new file mode 100644
index 00000000000..c3bde7dd933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+void
+foo (void)
+{
+  asm ("# " : : : "ebx");
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-4.c b/gcc/testsuite/gcc.target/i386/pr81736-4.c
new file mode 100644
index 00000000000..25f50016a64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+foo (int i1, int i2, int i3, int i4, int i5, int i6, int i7)
+{
+  return i7;
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-5.c b/gcc/testsuite/gcc.target/i386/pr81736-5.c
new file mode 100644
index 00000000000..e1602cf25ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -mavx" } */
+
+typedef int v8si __attribute__ ((vector_size (32)));
+
+void
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (v8si *out_start, v8si *out_end, v8si *regions)
+{
+  v8si base = regions[3];
+  *out_start = base;
+  *out_end = base;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
+/* Verify no dynamic realignment is performed.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-6.c b/gcc/testsuite/gcc.target/i386/pr81736-6.c
new file mode 100644
index 00000000000..6198574c8cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+struct foo
+{
+  int head;
+} a;
+
+int
+bar (void)
+{
+  return a.head != 0;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-7.c b/gcc/testsuite/gcc.target/i386/pr81736-7.c
new file mode 100644
index 00000000000..f947886e642
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int foo (void);
+
+int
+bar (void)
+{
+  return foo ();
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
-- 
2.13.4


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-08 16:38                         ` H.J. Lu
@ 2017-08-08 17:01                           ` Richard Biener
  2017-08-08 17:34                             ` Richard Sandiford
  2017-08-08 17:05                           ` Uros Bizjak
  1 sibling, 1 reply; 41+ messages in thread
From: Richard Biener @ 2017-08-08 17:01 UTC (permalink / raw)
  To: gcc-patches, H.J. Lu, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak, gcc-patches,
	richard.sandiford

On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu" <hjl.tools@gmail.com> wrote:
>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
><richard.sandiford@linaro.org> wrote:
>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>>>>> this optimization removes more than 730
>>>>>
>>>>> pushq %rbp
>>>>> movq %rsp, %rbp
>>>>> popq %rbp
>>>>
>>>> If you don't want the frame pointer, why are you compiling with
>>>> -fno-omit-frame-pointer?  Are you going to add
>>>> -fforce-no-omit-frame-pointer or something similar so that people
>can
>>>> actually get what they are asking for?  This doesn't really make
>sense.
>>>> It is perfectly fine to omit frame pointer by default, when it
>isn't
>>>> required for something, but if the user asks for it, we shouldn't
>ignore his
>>>> request.
>>>>
>>>
>>>
>>> wanting a framepointer is very nice and desired...  ... but if the
>>> optimizer/ins scheduler moves instructions outside of the frame'd
>>> portion, (it does it for cases like below as well), the value is
>>> already negative for these functions that don't have stack use.
>>>
>>> <MPIDU_Sched_are_pending@@Base>:
>>> mov    all_schedules@@Base-0x38460,%rax
>>> push   %rbp
>>> mov    %rsp,%rbp
>>> pop    %rbp
>>> cmpq   $0x0,(%rax)
>>> setne  %al
>>> movzbl %al,%eax
>>> retq
>>
>> Yeah, and it could be even weirder for big single-block functions.
>> I think GCC has been doing this kind of scheduling of prologue and
>> epilogue instructions for a while, so there hasn*t really been a
>> guarantee which parts of the function will have a new FP and which
>> will still have the old one.
>>
>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>shrink-wrapping
>> kicks in when the following is compiled with -O3
>-fno-omit-frame-pointer:
>>
>>     void f (int *);
>>     void
>>     g (int *x)
>>     {
>>       for (int i = 0; i < 1000; ++i)
>>         x[i] += 1;
>>       if (x[0])
>>         {
>>           int temp;
>>           f (&temp);
>>         }
>>     }
>>
>> so only the block with the call to f sets up FP.  The relatively
>> long-running loop runs with the caller's FP.
>>
>> I hope we can go for a target-independent position that what HJ*s
>> patch does is OK...
>>
>
>In light of this,  I am resubmitting my patch.  I added 3 more
>testcases
>and also handle:
>
>typedef int v8si __attribute__ ((vector_size (32)));
>
>void
>foo (v8si *out_start, v8si *out_end, v8si *regions)
>{
>    v8si base = regions[3];
>    *out_start = base;
>    *out_end = base;
>}
>
>OK for trunk?

The invoker specified -fno-omit-frame-pointer, why did you eliminate it?  I'd argue it's OK when neither -f nor -fno- is explicitly specified irrespective of the default in case we document the change but an explicit -fno- is pretty clear.

And yes, unfortunate placement of frame pointer init/de-init should be fixed.

Richard.

>
>Thanks.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-08 16:38                         ` H.J. Lu
  2017-08-08 17:01                           ` Richard Biener
@ 2017-08-08 17:05                           ` Uros Bizjak
  1 sibling, 0 replies; 41+ messages in thread
From: Uros Bizjak @ 2017-08-08 17:05 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Arjan van de Ven, Jakub Jelinek, Alexander Monakov, Michael Matz,
	gcc-patches, richard.sandiford

On Tue, Aug 8, 2017 at 6:38 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>>>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>>>>> this optimization removes more than 730
>>>>>
>>>>> pushq %rbp
>>>>> movq %rsp, %rbp
>>>>> popq %rbp
>>>>
>>>> If you don't want the frame pointer, why are you compiling with
>>>> -fno-omit-frame-pointer?  Are you going to add
>>>> -fforce-no-omit-frame-pointer or something similar so that people can
>>>> actually get what they are asking for?  This doesn't really make sense.
>>>> It is perfectly fine to omit frame pointer by default, when it isn't
>>>> required for something, but if the user asks for it, we shouldn't ignore his
>>>> request.
>>>>
>>>
>>>
>>> wanting a framepointer is very nice and desired...  ... but if the
>>> optimizer/ins scheduler moves instructions outside of the frame'd
>>> portion, (it does it for cases like below as well), the value is
>>> already negative for these functions that don't have stack use.
>>>
>>> <MPIDU_Sched_are_pending@@Base>:
>>> mov    all_schedules@@Base-0x38460,%rax
>>> push   %rbp
>>> mov    %rsp,%rbp
>>> pop    %rbp
>>> cmpq   $0x0,(%rax)
>>> setne  %al
>>> movzbl %al,%eax
>>> retq
>>
>> Yeah, and it could be even weirder for big single-block functions.
>> I think GCC has been doing this kind of scheduling of prologue and
>> epilogue instructions for a while, so there hasn*t really been a
>> guarantee which parts of the function will have a new FP and which
>> will still have the old one.
>>
>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1), shrink-wrapping
>> kicks in when the following is compiled with -O3 -fno-omit-frame-pointer:
>>
>>     void f (int *);
>>     void
>>     g (int *x)
>>     {
>>       for (int i = 0; i < 1000; ++i)
>>         x[i] += 1;
>>       if (x[0])
>>         {
>>           int temp;
>>           f (&temp);
>>         }
>>     }
>>
>> so only the block with the call to f sets up FP.  The relatively
>> long-running loop runs with the caller's FP.
>>
>> I hope we can go for a target-independent position that what HJ*s
>> patch does is OK...
>>
>
> In light of this,  I am resubmitting my patch.  I added 3 more testcases
> and also handle:
>
> typedef int v8si __attribute__ ((vector_size (32)));
>
> void
> foo (v8si *out_start, v8si *out_end, v8si *regions)
> {
>     v8si base = regions[3];
>     *out_start = base;
>     *out_end = base;
> }
>
> OK for trunk?

I think that the patch doesn't worsen the situation with FP debugging,
a couple of cases were presented where function operates on the caller
frame. Let's wait a bit for a counter-examples, where the patch hurts
debugging. IMO, the patch is the way to go, as shrink-wrapping is more
toxic than presented patch.

Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-08 17:01                           ` Richard Biener
@ 2017-08-08 17:34                             ` Richard Sandiford
  2017-08-08 17:36                               ` Richard Sandiford
  0 siblings, 1 reply; 41+ messages in thread
From: Richard Sandiford @ 2017-08-08 17:34 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, H.J. Lu, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak

Richard Biener <richard.guenther@gmail.com> writes:
> On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu" <hjl.tools@gmail.com> wrote:
>>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
>><richard.sandiford@linaro.org> wrote:
>>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>>>>>> this optimization removes more than 730
>>>>>>
>>>>>> pushq %rbp
>>>>>> movq %rsp, %rbp
>>>>>> popq %rbp
>>>>>
>>>>> If you don't want the frame pointer, why are you compiling with
>>>>> -fno-omit-frame-pointer?  Are you going to add
>>>>> -fforce-no-omit-frame-pointer or something similar so that people
>>can
>>>>> actually get what they are asking for?  This doesn't really make
>>sense.
>>>>> It is perfectly fine to omit frame pointer by default, when it
>>isn't
>>>>> required for something, but if the user asks for it, we shouldn't
>>ignore his
>>>>> request.
>>>>>
>>>>
>>>>
>>>> wanting a framepointer is very nice and desired...  ... but if the
>>>> optimizer/ins scheduler moves instructions outside of the frame'd
>>>> portion, (it does it for cases like below as well), the value is
>>>> already negative for these functions that don't have stack use.
>>>>
>>>> <MPIDU_Sched_are_pending@@Base>:
>>>> mov    all_schedules@@Base-0x38460,%rax
>>>> push   %rbp
>>>> mov    %rsp,%rbp
>>>> pop    %rbp
>>>> cmpq   $0x0,(%rax)
>>>> setne  %al
>>>> movzbl %al,%eax
>>>> retq
>>>
>>> Yeah, and it could be even weirder for big single-block functions.
>>> I think GCC has been doing this kind of scheduling of prologue and
>>> epilogue instructions for a while, so there hasn*t really been a
>>> guarantee which parts of the function will have a new FP and which
>>> will still have the old one.
>>>
>>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>>shrink-wrapping
>>> kicks in when the following is compiled with -O3
>>-fno-omit-frame-pointer:
>>>
>>>     void f (int *);
>>>     void
>>>     g (int *x)
>>>     {
>>>       for (int i = 0; i < 1000; ++i)
>>>         x[i] += 1;
>>>       if (x[0])
>>>         {
>>>           int temp;
>>>           f (&temp);
>>>         }
>>>     }
>>>
>>> so only the block with the call to f sets up FP.  The relatively
>>> long-running loop runs with the caller's FP.
>>>
>>> I hope we can go for a target-independent position that what HJ*s
>>> patch does is OK...
>>>
>>
>>In light of this,  I am resubmitting my patch.  I added 3 more
>>testcases
>>and also handle:
>>
>>typedef int v8si __attribute__ ((vector_size (32)));
>>
>>void
>>foo (v8si *out_start, v8si *out_end, v8si *regions)
>>{
>>    v8si base = regions[3];
>>    *out_start = base;
>>    *out_end = base;
>>}
>>
>>OK for trunk?
>
> The invoker specified -fno-omit-frame-pointer, why did you eliminate it?
> I'd argue it's OK when neither -f nor -fno- is explicitly specified
> irrespective of the default in case we document the change but an
> explicit -fno- is pretty clear.

I don't buy that we're ignoring the user.  -fomit-frame-pointer says
that, when you're creating a frame, it's OK not to set up the frame
pointer.  Forcing it off means that if you create a frame, you need
to set up the frame pointer too.  But it doesn't say anything about
whether the frame itself is needed.  I.e. it's -fno-omit-frame*-pointer*
rather than -fno-omit-frame.

It seems like the responses have been treating it more like
a combination of:

-fno-shrink-wrapping
-fno-omit-frame-pointer
the equivalent of the old textual prologues and epilogues

but the positive option -fomit-frame-pointer doesn't have any effect
on the last two.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-08 17:34                             ` Richard Sandiford
@ 2017-08-08 17:36                               ` Richard Sandiford
  2017-08-08 18:00                                 ` Richard Biener
  0 siblings, 1 reply; 41+ messages in thread
From: Richard Sandiford @ 2017-08-08 17:36 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, H.J. Lu, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak

Richard Sandiford <richard.sandiford@linaro.org> writes:
> Richard Biener <richard.guenther@gmail.com> writes:
>> On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu" <hjl.tools@gmail.com> wrote:
>>>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
>>><richard.sandiford@linaro.org> wrote:
>>>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>>>> When Linux/x86-64 kernel is compiled with -fno-omit-frame-pointer.
>>>>>>> this optimization removes more than 730
>>>>>>>
>>>>>>> pushq %rbp
>>>>>>> movq %rsp, %rbp
>>>>>>> popq %rbp
>>>>>>
>>>>>> If you don't want the frame pointer, why are you compiling with
>>>>>> -fno-omit-frame-pointer?  Are you going to add
>>>>>> -fforce-no-omit-frame-pointer or something similar so that people
>>>can
>>>>>> actually get what they are asking for?  This doesn't really make
>>>sense.
>>>>>> It is perfectly fine to omit frame pointer by default, when it
>>>isn't
>>>>>> required for something, but if the user asks for it, we shouldn't
>>>ignore his
>>>>>> request.
>>>>>>
>>>>>
>>>>>
>>>>> wanting a framepointer is very nice and desired...  ... but if the
>>>>> optimizer/ins scheduler moves instructions outside of the frame'd
>>>>> portion, (it does it for cases like below as well), the value is
>>>>> already negative for these functions that don't have stack use.
>>>>>
>>>>> <MPIDU_Sched_are_pending@@Base>:
>>>>> mov    all_schedules@@Base-0x38460,%rax
>>>>> push   %rbp
>>>>> mov    %rsp,%rbp
>>>>> pop    %rbp
>>>>> cmpq   $0x0,(%rax)
>>>>> setne  %al
>>>>> movzbl %al,%eax
>>>>> retq
>>>>
>>>> Yeah, and it could be even weirder for big single-block functions.
>>>> I think GCC has been doing this kind of scheduling of prologue and
>>>> epilogue instructions for a while, so there hasn*t really been a
>>>> guarantee which parts of the function will have a new FP and which
>>>> will still have the old one.
>>>>
>>>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>>>shrink-wrapping
>>>> kicks in when the following is compiled with -O3
>>>-fno-omit-frame-pointer:
>>>>
>>>>     void f (int *);
>>>>     void
>>>>     g (int *x)
>>>>     {
>>>>       for (int i = 0; i < 1000; ++i)
>>>>         x[i] += 1;
>>>>       if (x[0])
>>>>         {
>>>>           int temp;
>>>>           f (&temp);
>>>>         }
>>>>     }
>>>>
>>>> so only the block with the call to f sets up FP.  The relatively
>>>> long-running loop runs with the caller's FP.
>>>>
>>>> I hope we can go for a target-independent position that what HJ*s
>>>> patch does is OK...
>>>>
>>>
>>>In light of this,  I am resubmitting my patch.  I added 3 more
>>>testcases
>>>and also handle:
>>>
>>>typedef int v8si __attribute__ ((vector_size (32)));
>>>
>>>void
>>>foo (v8si *out_start, v8si *out_end, v8si *regions)
>>>{
>>>    v8si base = regions[3];
>>>    *out_start = base;
>>>    *out_end = base;
>>>}
>>>
>>>OK for trunk?
>>
>> The invoker specified -fno-omit-frame-pointer, why did you eliminate it?
>> I'd argue it's OK when neither -f nor -fno- is explicitly specified
>> irrespective of the default in case we document the change but an
>> explicit -fno- is pretty clear.
>
> I don't buy that we're ignoring the user.  -fomit-frame-pointer says
> that, when you're creating a frame, it's OK not to set up the frame
> pointer.  Forcing it off means that if you create a frame, you need
> to set up the frame pointer too.  But it doesn't say anything about
> whether the frame itself is needed.  I.e. it's -fno-omit-frame*-pointer*
> rather than -fno-omit-frame.
>
> It seems like the responses have been treating it more like
> a combination of:
>
> -fno-shrink-wrapping
> -fno-omit-frame-pointer
> the equivalent of the old textual prologues and epilogues
>
> but the positive option -fomit-frame-pointer doesn't have any effect
> on the last two.

er, you know what I mean :-)  It doesn't have any effect on
-fshrink-wrapping or the textual-style prologues and epilogues.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-08 17:36                               ` Richard Sandiford
@ 2017-08-08 18:00                                 ` Richard Biener
  2017-08-08 18:29                                   ` H.J. Lu
  2017-08-09  7:53                                   ` Richard Sandiford
  0 siblings, 2 replies; 41+ messages in thread
From: Richard Biener @ 2017-08-08 18:00 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, H.J. Lu, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak

On August 8, 2017 7:36:35 PM GMT+02:00, Richard Sandiford <richard.sandiford@linaro.org> wrote:
>Richard Sandiford <richard.sandiford@linaro.org> writes:
>> Richard Biener <richard.guenther@gmail.com> writes:
>>> On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu"
><hjl.tools@gmail.com> wrote:
>>>>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
>>>><richard.sandiford@linaro.org> wrote:
>>>>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>>>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>>>>> When Linux/x86-64 kernel is compiled with
>-fno-omit-frame-pointer.
>>>>>>>> this optimization removes more than 730
>>>>>>>>
>>>>>>>> pushq %rbp
>>>>>>>> movq %rsp, %rbp
>>>>>>>> popq %rbp
>>>>>>>
>>>>>>> If you don't want the frame pointer, why are you compiling with
>>>>>>> -fno-omit-frame-pointer?  Are you going to add
>>>>>>> -fforce-no-omit-frame-pointer or something similar so that
>people
>>>>can
>>>>>>> actually get what they are asking for?  This doesn't really make
>>>>sense.
>>>>>>> It is perfectly fine to omit frame pointer by default, when it
>>>>isn't
>>>>>>> required for something, but if the user asks for it, we
>shouldn't
>>>>ignore his
>>>>>>> request.
>>>>>>>
>>>>>>
>>>>>>
>>>>>> wanting a framepointer is very nice and desired...  ... but if
>the
>>>>>> optimizer/ins scheduler moves instructions outside of the frame'd
>>>>>> portion, (it does it for cases like below as well), the value is
>>>>>> already negative for these functions that don't have stack use.
>>>>>>
>>>>>> <MPIDU_Sched_are_pending@@Base>:
>>>>>> mov    all_schedules@@Base-0x38460,%rax
>>>>>> push   %rbp
>>>>>> mov    %rsp,%rbp
>>>>>> pop    %rbp
>>>>>> cmpq   $0x0,(%rax)
>>>>>> setne  %al
>>>>>> movzbl %al,%eax
>>>>>> retq
>>>>>
>>>>> Yeah, and it could be even weirder for big single-block functions.
>>>>> I think GCC has been doing this kind of scheduling of prologue and
>>>>> epilogue instructions for a while, so there hasn*t really been a
>>>>> guarantee which parts of the function will have a new FP and which
>>>>> will still have the old one.
>>>>>
>>>>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>>>>shrink-wrapping
>>>>> kicks in when the following is compiled with -O3
>>>>-fno-omit-frame-pointer:
>>>>>
>>>>>     void f (int *);
>>>>>     void
>>>>>     g (int *x)
>>>>>     {
>>>>>       for (int i = 0; i < 1000; ++i)
>>>>>         x[i] += 1;
>>>>>       if (x[0])
>>>>>         {
>>>>>           int temp;
>>>>>           f (&temp);
>>>>>         }
>>>>>     }
>>>>>
>>>>> so only the block with the call to f sets up FP.  The relatively
>>>>> long-running loop runs with the caller's FP.
>>>>>
>>>>> I hope we can go for a target-independent position that what HJ*s
>>>>> patch does is OK...
>>>>>
>>>>
>>>>In light of this,  I am resubmitting my patch.  I added 3 more
>>>>testcases
>>>>and also handle:
>>>>
>>>>typedef int v8si __attribute__ ((vector_size (32)));
>>>>
>>>>void
>>>>foo (v8si *out_start, v8si *out_end, v8si *regions)
>>>>{
>>>>    v8si base = regions[3];
>>>>    *out_start = base;
>>>>    *out_end = base;
>>>>}
>>>>
>>>>OK for trunk?
>>>
>>> The invoker specified -fno-omit-frame-pointer, why did you eliminate
>it?
>>> I'd argue it's OK when neither -f nor -fno- is explicitly specified
>>> irrespective of the default in case we document the change but an
>>> explicit -fno- is pretty clear.
>>
>> I don't buy that we're ignoring the user.  -fomit-frame-pointer says
>> that, when you're creating a frame, it's OK not to set up the frame
>> pointer.  Forcing it off means that if you create a frame, you need
>> to set up the frame pointer too.  But it doesn't say anything about
>> whether the frame itself is needed.  I.e. it's
>-fno-omit-frame*-pointer*
>> rather than -fno-omit-frame.

Isn't that a bit splitting hairs if you look at (past) history?

You could also interpret -fno-omit-frame-pointer as obviously forcing a frame as otherwise there's nothing to omit...

>> It seems like the responses have been treating it more like
>> a combination of:
>>
>> -fno-shrink-wrapping
>> -fno-omit-frame-pointer
>> the equivalent of the old textual prologues and epilogues
>>
>> but the positive option -fomit-frame-pointer doesn't have any effect
>> on the last two.
>
>er, you know what I mean :-)  It doesn't have any effect on
>-fshrink-wrapping or the textual-style prologues and epilogues.

True.  But I think people do not appreciate new options too much if existing ones worked in the past...

Richard.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-08 18:00                                 ` Richard Biener
@ 2017-08-08 18:29                                   ` H.J. Lu
  2017-08-09  7:53                                   ` Richard Sandiford
  1 sibling, 0 replies; 41+ messages in thread
From: H.J. Lu @ 2017-08-08 18:29 UTC (permalink / raw)
  To: Richard Biener
  Cc: Richard Sandiford, GCC Patches, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak

On Tue, Aug 8, 2017 at 11:00 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On August 8, 2017 7:36:35 PM GMT+02:00, Richard Sandiford <richard.sandiford@linaro.org> wrote:
>>Richard Sandiford <richard.sandiford@linaro.org> writes:
>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>> On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu"
>><hjl.tools@gmail.com> wrote:
>>>>>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
>>>>><richard.sandiford@linaro.org> wrote:
>>>>>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>>>>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>>>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>>>>>> When Linux/x86-64 kernel is compiled with
>>-fno-omit-frame-pointer.
>>>>>>>>> this optimization removes more than 730
>>>>>>>>>
>>>>>>>>> pushq %rbp
>>>>>>>>> movq %rsp, %rbp
>>>>>>>>> popq %rbp
>>>>>>>>
>>>>>>>> If you don't want the frame pointer, why are you compiling with
>>>>>>>> -fno-omit-frame-pointer?  Are you going to add
>>>>>>>> -fforce-no-omit-frame-pointer or something similar so that
>>people
>>>>>can
>>>>>>>> actually get what they are asking for?  This doesn't really make
>>>>>sense.
>>>>>>>> It is perfectly fine to omit frame pointer by default, when it
>>>>>isn't
>>>>>>>> required for something, but if the user asks for it, we
>>shouldn't
>>>>>ignore his
>>>>>>>> request.
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> wanting a framepointer is very nice and desired...  ... but if
>>the
>>>>>>> optimizer/ins scheduler moves instructions outside of the frame'd
>>>>>>> portion, (it does it for cases like below as well), the value is
>>>>>>> already negative for these functions that don't have stack use.
>>>>>>>
>>>>>>> <MPIDU_Sched_are_pending@@Base>:
>>>>>>> mov    all_schedules@@Base-0x38460,%rax
>>>>>>> push   %rbp
>>>>>>> mov    %rsp,%rbp
>>>>>>> pop    %rbp
>>>>>>> cmpq   $0x0,(%rax)
>>>>>>> setne  %al
>>>>>>> movzbl %al,%eax
>>>>>>> retq
>>>>>>
>>>>>> Yeah, and it could be even weirder for big single-block functions.
>>>>>> I think GCC has been doing this kind of scheduling of prologue and
>>>>>> epilogue instructions for a while, so there hasn*t really been a
>>>>>> guarantee which parts of the function will have a new FP and which
>>>>>> will still have the old one.
>>>>>>
>>>>>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>>>>>shrink-wrapping
>>>>>> kicks in when the following is compiled with -O3
>>>>>-fno-omit-frame-pointer:
>>>>>>
>>>>>>     void f (int *);
>>>>>>     void
>>>>>>     g (int *x)
>>>>>>     {
>>>>>>       for (int i = 0; i < 1000; ++i)
>>>>>>         x[i] += 1;
>>>>>>       if (x[0])
>>>>>>         {
>>>>>>           int temp;
>>>>>>           f (&temp);
>>>>>>         }
>>>>>>     }
>>>>>>
>>>>>> so only the block with the call to f sets up FP.  The relatively
>>>>>> long-running loop runs with the caller's FP.
>>>>>>
>>>>>> I hope we can go for a target-independent position that what HJ*s
>>>>>> patch does is OK...
>>>>>>
>>>>>
>>>>>In light of this,  I am resubmitting my patch.  I added 3 more
>>>>>testcases
>>>>>and also handle:
>>>>>
>>>>>typedef int v8si __attribute__ ((vector_size (32)));
>>>>>
>>>>>void
>>>>>foo (v8si *out_start, v8si *out_end, v8si *regions)
>>>>>{
>>>>>    v8si base = regions[3];
>>>>>    *out_start = base;
>>>>>    *out_end = base;
>>>>>}
>>>>>
>>>>>OK for trunk?
>>>>
>>>> The invoker specified -fno-omit-frame-pointer, why did you eliminate
>>it?
>>>> I'd argue it's OK when neither -f nor -fno- is explicitly specified
>>>> irrespective of the default in case we document the change but an
>>>> explicit -fno- is pretty clear.
>>>
>>> I don't buy that we're ignoring the user.  -fomit-frame-pointer says
>>> that, when you're creating a frame, it's OK not to set up the frame
>>> pointer.  Forcing it off means that if you create a frame, you need
>>> to set up the frame pointer too.  But it doesn't say anything about
>>> whether the frame itself is needed.  I.e. it's
>>-fno-omit-frame*-pointer*
>>> rather than -fno-omit-frame.
>
> Isn't that a bit splitting hairs if you look at (past) history?
>
> You could also interpret -fno-omit-frame-pointer as obviously forcing a frame as otherwise there's nothing to omit...
>
>>> It seems like the responses have been treating it more like
>>> a combination of:
>>>
>>> -fno-shrink-wrapping
>>> -fno-omit-frame-pointer
>>> the equivalent of the old textual prologues and epilogues
>>>
>>> but the positive option -fomit-frame-pointer doesn't have any effect
>>> on the last two.
>>
>>er, you know what I mean :-)  It doesn't have any effect on
>>-fshrink-wrapping or the textual-style prologues and epilogues.
>
> True.  But I think people do not appreciate new options too much if existing ones worked in the past...
>

Should we also disable LTO and function inlining with -fno-omit-frame-pointer?
Both of them may eliminate frame pointer.

-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-08 18:00                                 ` Richard Biener
  2017-08-08 18:29                                   ` H.J. Lu
@ 2017-08-09  7:53                                   ` Richard Sandiford
  2017-08-09 11:22                                     ` Richard Biener
  1 sibling, 1 reply; 41+ messages in thread
From: Richard Sandiford @ 2017-08-09  7:53 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, H.J. Lu, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak

Richard Biener <richard.guenther@gmail.com> writes:
> On August 8, 2017 7:36:35 PM GMT+02:00, Richard Sandiford
> <richard.sandiford@linaro.org> wrote:
>>Richard Sandiford <richard.sandiford@linaro.org> writes:
>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>> On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu"
>><hjl.tools@gmail.com> wrote:
>>>>>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
>>>>><richard.sandiford@linaro.org> wrote:
>>>>>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>>>>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>>>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>>>>>> When Linux/x86-64 kernel is compiled with
>>-fno-omit-frame-pointer.
>>>>>>>>> this optimization removes more than 730
>>>>>>>>>
>>>>>>>>> pushq %rbp
>>>>>>>>> movq %rsp, %rbp
>>>>>>>>> popq %rbp
>>>>>>>>
>>>>>>>> If you don't want the frame pointer, why are you compiling with
>>>>>>>> -fno-omit-frame-pointer?  Are you going to add
>>>>>>>> -fforce-no-omit-frame-pointer or something similar so that
>>people
>>>>>can
>>>>>>>> actually get what they are asking for?  This doesn't really make
>>>>>sense.
>>>>>>>> It is perfectly fine to omit frame pointer by default, when it
>>>>>isn't
>>>>>>>> required for something, but if the user asks for it, we
>>shouldn't
>>>>>ignore his
>>>>>>>> request.
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> wanting a framepointer is very nice and desired...  ... but if
>>the
>>>>>>> optimizer/ins scheduler moves instructions outside of the frame'd
>>>>>>> portion, (it does it for cases like below as well), the value is
>>>>>>> already negative for these functions that don't have stack use.
>>>>>>>
>>>>>>> <MPIDU_Sched_are_pending@@Base>:
>>>>>>> mov    all_schedules@@Base-0x38460,%rax
>>>>>>> push   %rbp
>>>>>>> mov    %rsp,%rbp
>>>>>>> pop    %rbp
>>>>>>> cmpq   $0x0,(%rax)
>>>>>>> setne  %al
>>>>>>> movzbl %al,%eax
>>>>>>> retq
>>>>>>
>>>>>> Yeah, and it could be even weirder for big single-block functions.
>>>>>> I think GCC has been doing this kind of scheduling of prologue and
>>>>>> epilogue instructions for a while, so there hasn*t really been a
>>>>>> guarantee which parts of the function will have a new FP and which
>>>>>> will still have the old one.
>>>>>>
>>>>>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>>>>>shrink-wrapping
>>>>>> kicks in when the following is compiled with -O3
>>>>>-fno-omit-frame-pointer:
>>>>>>
>>>>>>     void f (int *);
>>>>>>     void
>>>>>>     g (int *x)
>>>>>>     {
>>>>>>       for (int i = 0; i < 1000; ++i)
>>>>>>         x[i] += 1;
>>>>>>       if (x[0])
>>>>>>         {
>>>>>>           int temp;
>>>>>>           f (&temp);
>>>>>>         }
>>>>>>     }
>>>>>>
>>>>>> so only the block with the call to f sets up FP.  The relatively
>>>>>> long-running loop runs with the caller's FP.
>>>>>>
>>>>>> I hope we can go for a target-independent position that what HJ*s
>>>>>> patch does is OK...
>>>>>>
>>>>>
>>>>>In light of this,  I am resubmitting my patch.  I added 3 more
>>>>>testcases
>>>>>and also handle:
>>>>>
>>>>>typedef int v8si __attribute__ ((vector_size (32)));
>>>>>
>>>>>void
>>>>>foo (v8si *out_start, v8si *out_end, v8si *regions)
>>>>>{
>>>>>    v8si base = regions[3];
>>>>>    *out_start = base;
>>>>>    *out_end = base;
>>>>>}
>>>>>
>>>>>OK for trunk?
>>>>
>>>> The invoker specified -fno-omit-frame-pointer, why did you eliminate
>>it?
>>>> I'd argue it's OK when neither -f nor -fno- is explicitly specified
>>>> irrespective of the default in case we document the change but an
>>>> explicit -fno- is pretty clear.
>>>
>>> I don't buy that we're ignoring the user.  -fomit-frame-pointer says
>>> that, when you're creating a frame, it's OK not to set up the frame
>>> pointer.  Forcing it off means that if you create a frame, you need
>>> to set up the frame pointer too.  But it doesn't say anything about
>>> whether the frame itself is needed.  I.e. it's
>>-fno-omit-frame*-pointer*
>>> rather than -fno-omit-frame.
>
> Isn't that a bit splitting hairs if you look at (past) history?

I guess it would have been splitting hairs in the days when they
amounted to the same thing, i.e. when there was no behaviour that
would match "-fomit-frame" and when the prologue and epilogue were
glued to the start and end of the function.  But that was quite a
long time ago.  Shrink-wrapping at least means that omitting the frame
and omitting the frame pointer are different things, and it seems
fair that -fomit-frame-pointer has followed the natural meaning.

> You could also interpret -fno-omit-frame-pointer as obviously forcing a
> frame as otherwise there's nothing to omit...

But applying that kind of interpretation to something like
-maccumulate-outgoing-args would make inlining all calls within a
function invalid, since there'd no longer be arguments to accumulate.

I think this kind of disagreement just emphasises that if we really
need a "always emit a prologue at the very start, an epilogue at the
very end, and always use a frame pointer" option, we should add it
and document exactly what the guarantees are.  I don't think
-fno-omit-frame-pointer should be it, since as the replies earlier in
the thread said, the natural meaning of that option has its uses too.

Thanks,
Richard

>
>>> It seems like the responses have been treating it more like
>>> a combination of:
>>>
>>> -fno-shrink-wrapping
>>> -fno-omit-frame-pointer
>>> the equivalent of the old textual prologues and epilogues
>>>
>>> but the positive option -fomit-frame-pointer doesn't have any effect
>>> on the last two.
>>
>>er, you know what I mean :-)  It doesn't have any effect on
>>-fshrink-wrapping or the textual-style prologues and epilogues.
>
> True.  But I think people do not appreciate new options too much if
> existing ones worked in the past...
>
> Richard.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09  7:53                                   ` Richard Sandiford
@ 2017-08-09 11:22                                     ` Richard Biener
  2017-08-09 11:31                                       ` H.J. Lu
  0 siblings, 1 reply; 41+ messages in thread
From: Richard Biener @ 2017-08-09 11:22 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, H.J. Lu, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak

On August 9, 2017 9:53:05 AM GMT+02:00, Richard Sandiford <richard.sandiford@linaro.org> wrote:
>Richard Biener <richard.guenther@gmail.com> writes:
>> On August 8, 2017 7:36:35 PM GMT+02:00, Richard Sandiford
>> <richard.sandiford@linaro.org> wrote:
>>>Richard Sandiford <richard.sandiford@linaro.org> writes:
>>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>>> On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu"
>>><hjl.tools@gmail.com> wrote:
>>>>>>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
>>>>>><richard.sandiford@linaro.org> wrote:
>>>>>>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>>>>>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>>>>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>>>>>>> When Linux/x86-64 kernel is compiled with
>>>-fno-omit-frame-pointer.
>>>>>>>>>> this optimization removes more than 730
>>>>>>>>>>
>>>>>>>>>> pushq %rbp
>>>>>>>>>> movq %rsp, %rbp
>>>>>>>>>> popq %rbp
>>>>>>>>>
>>>>>>>>> If you don't want the frame pointer, why are you compiling
>with
>>>>>>>>> -fno-omit-frame-pointer?  Are you going to add
>>>>>>>>> -fforce-no-omit-frame-pointer or something similar so that
>>>people
>>>>>>can
>>>>>>>>> actually get what they are asking for?  This doesn't really
>make
>>>>>>sense.
>>>>>>>>> It is perfectly fine to omit frame pointer by default, when it
>>>>>>isn't
>>>>>>>>> required for something, but if the user asks for it, we
>>>shouldn't
>>>>>>ignore his
>>>>>>>>> request.
>>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> wanting a framepointer is very nice and desired...  ... but if
>>>the
>>>>>>>> optimizer/ins scheduler moves instructions outside of the
>frame'd
>>>>>>>> portion, (it does it for cases like below as well), the value
>is
>>>>>>>> already negative for these functions that don't have stack use.
>>>>>>>>
>>>>>>>> <MPIDU_Sched_are_pending@@Base>:
>>>>>>>> mov    all_schedules@@Base-0x38460,%rax
>>>>>>>> push   %rbp
>>>>>>>> mov    %rsp,%rbp
>>>>>>>> pop    %rbp
>>>>>>>> cmpq   $0x0,(%rax)
>>>>>>>> setne  %al
>>>>>>>> movzbl %al,%eax
>>>>>>>> retq
>>>>>>>
>>>>>>> Yeah, and it could be even weirder for big single-block
>functions.
>>>>>>> I think GCC has been doing this kind of scheduling of prologue
>and
>>>>>>> epilogue instructions for a while, so there hasn*t really been a
>>>>>>> guarantee which parts of the function will have a new FP and
>which
>>>>>>> will still have the old one.
>>>>>>>
>>>>>>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>>>>>>shrink-wrapping
>>>>>>> kicks in when the following is compiled with -O3
>>>>>>-fno-omit-frame-pointer:
>>>>>>>
>>>>>>>     void f (int *);
>>>>>>>     void
>>>>>>>     g (int *x)
>>>>>>>     {
>>>>>>>       for (int i = 0; i < 1000; ++i)
>>>>>>>         x[i] += 1;
>>>>>>>       if (x[0])
>>>>>>>         {
>>>>>>>           int temp;
>>>>>>>           f (&temp);
>>>>>>>         }
>>>>>>>     }
>>>>>>>
>>>>>>> so only the block with the call to f sets up FP.  The relatively
>>>>>>> long-running loop runs with the caller's FP.
>>>>>>>
>>>>>>> I hope we can go for a target-independent position that what
>HJ*s
>>>>>>> patch does is OK...
>>>>>>>
>>>>>>
>>>>>>In light of this,  I am resubmitting my patch.  I added 3 more
>>>>>>testcases
>>>>>>and also handle:
>>>>>>
>>>>>>typedef int v8si __attribute__ ((vector_size (32)));
>>>>>>
>>>>>>void
>>>>>>foo (v8si *out_start, v8si *out_end, v8si *regions)
>>>>>>{
>>>>>>    v8si base = regions[3];
>>>>>>    *out_start = base;
>>>>>>    *out_end = base;
>>>>>>}
>>>>>>
>>>>>>OK for trunk?
>>>>>
>>>>> The invoker specified -fno-omit-frame-pointer, why did you
>eliminate
>>>it?
>>>>> I'd argue it's OK when neither -f nor -fno- is explicitly
>specified
>>>>> irrespective of the default in case we document the change but an
>>>>> explicit -fno- is pretty clear.
>>>>
>>>> I don't buy that we're ignoring the user.  -fomit-frame-pointer
>says
>>>> that, when you're creating a frame, it's OK not to set up the frame
>>>> pointer.  Forcing it off means that if you create a frame, you need
>>>> to set up the frame pointer too.  But it doesn't say anything about
>>>> whether the frame itself is needed.  I.e. it's
>>>-fno-omit-frame*-pointer*
>>>> rather than -fno-omit-frame.
>>
>> Isn't that a bit splitting hairs if you look at (past) history?
>
>I guess it would have been splitting hairs in the days when they
>amounted to the same thing, i.e. when there was no behaviour that
>would match "-fomit-frame" and when the prologue and epilogue were
>glued to the start and end of the function.  But that was quite a
>long time ago.  Shrink-wrapping at least means that omitting the frame
>and omitting the frame pointer are different things, and it seems
>fair that -fomit-frame-pointer has followed the natural meaning.
>
>> You could also interpret -fno-omit-frame-pointer as obviously forcing
>a
>> frame as otherwise there's nothing to omit...
>
>But applying that kind of interpretation to something like
>-maccumulate-outgoing-args would make inlining all calls within a
>function invalid, since there'd no longer be arguments to accumulate.
>
>I think this kind of disagreement just emphasises that if we really
>need a "always emit a prologue at the very start, an epilogue at the
>very end, and always use a frame pointer" option, we should add it
>and document exactly what the guarantees are.  I don't think
>-fno-omit-frame-pointer should be it, since as the replies earlier in
>the thread said, the natural meaning of that option has its uses too.

OK, but then both -f[no-]omit-frame-pointer do not have clearly defined semantics and thus shouldn't be exposed to the user?

Richard.

>Thanks,
>Richard
>
>>
>>>> It seems like the responses have been treating it more like
>>>> a combination of:
>>>>
>>>> -fno-shrink-wrapping
>>>> -fno-omit-frame-pointer
>>>> the equivalent of the old textual prologues and epilogues
>>>>
>>>> but the positive option -fomit-frame-pointer doesn't have any
>effect
>>>> on the last two.
>>>
>>>er, you know what I mean :-)  It doesn't have any effect on
>>>-fshrink-wrapping or the textual-style prologues and epilogues.
>>
>> True.  But I think people do not appreciate new options too much if
>> existing ones worked in the past...
>>
>> Richard.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 11:22                                     ` Richard Biener
@ 2017-08-09 11:31                                       ` H.J. Lu
  2017-08-09 11:59                                         ` Michael Matz
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-09 11:31 UTC (permalink / raw)
  To: Richard Biener
  Cc: Richard Sandiford, GCC Patches, Arjan van de Ven, Jakub Jelinek,
	Alexander Monakov, Michael Matz, Uros Bizjak

On Wed, Aug 9, 2017 at 4:22 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On August 9, 2017 9:53:05 AM GMT+02:00, Richard Sandiford <richard.sandiford@linaro.org> wrote:
>>Richard Biener <richard.guenther@gmail.com> writes:
>>> On August 8, 2017 7:36:35 PM GMT+02:00, Richard Sandiford
>>> <richard.sandiford@linaro.org> wrote:
>>>>Richard Sandiford <richard.sandiford@linaro.org> writes:
>>>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>>>> On August 8, 2017 6:38:30 PM GMT+02:00, "H.J. Lu"
>>>><hjl.tools@gmail.com> wrote:
>>>>>>>On Mon, Aug 7, 2017 at 1:05 PM, Richard Sandiford
>>>>>>><richard.sandiford@linaro.org> wrote:
>>>>>>>> Arjan van de Ven <arjan@linux.intel.com> writes:
>>>>>>>>> On 8/7/2017 8:43 AM, Jakub Jelinek wrote:
>>>>>>>>>> On Mon, Aug 07, 2017 at 08:39:24AM -0700, H.J. Lu wrote:
>>>>>>>>>>> When Linux/x86-64 kernel is compiled with
>>>>-fno-omit-frame-pointer.
>>>>>>>>>>> this optimization removes more than 730
>>>>>>>>>>>
>>>>>>>>>>> pushq %rbp
>>>>>>>>>>> movq %rsp, %rbp
>>>>>>>>>>> popq %rbp
>>>>>>>>>>
>>>>>>>>>> If you don't want the frame pointer, why are you compiling
>>with
>>>>>>>>>> -fno-omit-frame-pointer?  Are you going to add
>>>>>>>>>> -fforce-no-omit-frame-pointer or something similar so that
>>>>people
>>>>>>>can
>>>>>>>>>> actually get what they are asking for?  This doesn't really
>>make
>>>>>>>sense.
>>>>>>>>>> It is perfectly fine to omit frame pointer by default, when it
>>>>>>>isn't
>>>>>>>>>> required for something, but if the user asks for it, we
>>>>shouldn't
>>>>>>>ignore his
>>>>>>>>>> request.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> wanting a framepointer is very nice and desired...  ... but if
>>>>the
>>>>>>>>> optimizer/ins scheduler moves instructions outside of the
>>frame'd
>>>>>>>>> portion, (it does it for cases like below as well), the value
>>is
>>>>>>>>> already negative for these functions that don't have stack use.
>>>>>>>>>
>>>>>>>>> <MPIDU_Sched_are_pending@@Base>:
>>>>>>>>> mov    all_schedules@@Base-0x38460,%rax
>>>>>>>>> push   %rbp
>>>>>>>>> mov    %rsp,%rbp
>>>>>>>>> pop    %rbp
>>>>>>>>> cmpq   $0x0,(%rax)
>>>>>>>>> setne  %al
>>>>>>>>> movzbl %al,%eax
>>>>>>>>> retq
>>>>>>>>
>>>>>>>> Yeah, and it could be even weirder for big single-block
>>functions.
>>>>>>>> I think GCC has been doing this kind of scheduling of prologue
>>and
>>>>>>>> epilogue instructions for a while, so there hasn*t really been a
>>>>>>>> guarantee which parts of the function will have a new FP and
>>which
>>>>>>>> will still have the old one.
>>>>>>>>
>>>>>>>> Also, with an arbitrarily-picked host compiler (GCC 6.3.1),
>>>>>>>shrink-wrapping
>>>>>>>> kicks in when the following is compiled with -O3
>>>>>>>-fno-omit-frame-pointer:
>>>>>>>>
>>>>>>>>     void f (int *);
>>>>>>>>     void
>>>>>>>>     g (int *x)
>>>>>>>>     {
>>>>>>>>       for (int i = 0; i < 1000; ++i)
>>>>>>>>         x[i] += 1;
>>>>>>>>       if (x[0])
>>>>>>>>         {
>>>>>>>>           int temp;
>>>>>>>>           f (&temp);
>>>>>>>>         }
>>>>>>>>     }
>>>>>>>>
>>>>>>>> so only the block with the call to f sets up FP.  The relatively
>>>>>>>> long-running loop runs with the caller's FP.
>>>>>>>>
>>>>>>>> I hope we can go for a target-independent position that what
>>HJ*s
>>>>>>>> patch does is OK...
>>>>>>>>
>>>>>>>
>>>>>>>In light of this,  I am resubmitting my patch.  I added 3 more
>>>>>>>testcases
>>>>>>>and also handle:
>>>>>>>
>>>>>>>typedef int v8si __attribute__ ((vector_size (32)));
>>>>>>>
>>>>>>>void
>>>>>>>foo (v8si *out_start, v8si *out_end, v8si *regions)
>>>>>>>{
>>>>>>>    v8si base = regions[3];
>>>>>>>    *out_start = base;
>>>>>>>    *out_end = base;
>>>>>>>}
>>>>>>>
>>>>>>>OK for trunk?
>>>>>>
>>>>>> The invoker specified -fno-omit-frame-pointer, why did you
>>eliminate
>>>>it?
>>>>>> I'd argue it's OK when neither -f nor -fno- is explicitly
>>specified
>>>>>> irrespective of the default in case we document the change but an
>>>>>> explicit -fno- is pretty clear.
>>>>>
>>>>> I don't buy that we're ignoring the user.  -fomit-frame-pointer
>>says
>>>>> that, when you're creating a frame, it's OK not to set up the frame
>>>>> pointer.  Forcing it off means that if you create a frame, you need
>>>>> to set up the frame pointer too.  But it doesn't say anything about
>>>>> whether the frame itself is needed.  I.e. it's
>>>>-fno-omit-frame*-pointer*
>>>>> rather than -fno-omit-frame.
>>>
>>> Isn't that a bit splitting hairs if you look at (past) history?
>>
>>I guess it would have been splitting hairs in the days when they
>>amounted to the same thing, i.e. when there was no behaviour that
>>would match "-fomit-frame" and when the prologue and epilogue were
>>glued to the start and end of the function.  But that was quite a
>>long time ago.  Shrink-wrapping at least means that omitting the frame
>>and omitting the frame pointer are different things, and it seems
>>fair that -fomit-frame-pointer has followed the natural meaning.
>>
>>> You could also interpret -fno-omit-frame-pointer as obviously forcing
>>a
>>> frame as otherwise there's nothing to omit...
>>
>>But applying that kind of interpretation to something like
>>-maccumulate-outgoing-args would make inlining all calls within a
>>function invalid, since there'd no longer be arguments to accumulate.
>>
>>I think this kind of disagreement just emphasises that if we really
>>need a "always emit a prologue at the very start, an epilogue at the
>>very end, and always use a frame pointer" option, we should add it
>>and document exactly what the guarantees are.  I don't think
>>-fno-omit-frame-pointer should be it, since as the replies earlier in
>>the thread said, the natural meaning of that option has its uses too.
>
> OK, but then both -f[no-]omit-frame-pointer do not have clearly defined semantics and thus shouldn't be exposed to the user?
>

-f[no-]omit-frame-pointer apply to cases where a new stack frame
is needed.  -fno-omit-frame-pointer allows you to unwind each
stack frame, not necessarily each function, via frame pointer.
 -fno-omit-frame-pointer may not create a new stack frame for
each function, similar to LTO or function inlining.  But you can still
unwind via frame pointer.  We never guarantee that we will create
a new stack frame for each function.  Some functions are inlined
completely.  Some just use the caller's stack frame.

-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 11:31                                       ` H.J. Lu
@ 2017-08-09 11:59                                         ` Michael Matz
  2017-08-09 12:27                                           ` H.J. Lu
  0 siblings, 1 reply; 41+ messages in thread
From: Michael Matz @ 2017-08-09 11:59 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Richard Biener, Richard Sandiford, GCC Patches, Arjan van de Ven,
	Jakub Jelinek, Alexander Monakov, Uros Bizjak

Hi,

On Wed, 9 Aug 2017, H.J. Lu wrote:

> > OK, but then both -f[no-]omit-frame-pointer do not have clearly defined semantics and thus shouldn't be exposed to the user?
> 
> -f[no-]omit-frame-pointer apply to cases where a new stack frame
> is needed.  -fno-omit-frame-pointer allows you to unwind each
> stack frame, not necessarily each function, via frame pointer.
>  -fno-omit-frame-pointer may not create a new stack frame for
> each function, similar to LTO or function inlining.  But you can still
> unwind via frame pointer.  We never guarantee that we will create
> a new stack frame for each function.  Some functions are inlined
> completely.  Some just use the caller's stack frame.

Maybe something along these lines should be added to the docu of 
fno-omit-frame-pointer then.  At least "Note that -fno-omit-frame-pointer 
doesn't force a frame for all functions if it isn't otherwise needed, and 
hence doesn't guarantee a frame pointer for all functions." .


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 11:59                                         ` Michael Matz
@ 2017-08-09 12:27                                           ` H.J. Lu
  2017-08-09 15:04                                             ` Andi Kleen
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-09 12:27 UTC (permalink / raw)
  To: Michael Matz
  Cc: Richard Biener, Richard Sandiford, GCC Patches, Arjan van de Ven,
	Jakub Jelinek, Alexander Monakov, Uros Bizjak

[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]

On Wed, Aug 9, 2017 at 4:59 AM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> On Wed, 9 Aug 2017, H.J. Lu wrote:
>
>> > OK, but then both -f[no-]omit-frame-pointer do not have clearly defined semantics and thus shouldn't be exposed to the user?
>>
>> -f[no-]omit-frame-pointer apply to cases where a new stack frame
>> is needed.  -fno-omit-frame-pointer allows you to unwind each
>> stack frame, not necessarily each function, via frame pointer.
>>  -fno-omit-frame-pointer may not create a new stack frame for
>> each function, similar to LTO or function inlining.  But you can still
>> unwind via frame pointer.  We never guarantee that we will create
>> a new stack frame for each function.  Some functions are inlined
>> completely.  Some just use the caller's stack frame.
>
> Maybe something along these lines should be added to the docu of
> fno-omit-frame-pointer then.  At least "Note that -fno-omit-frame-pointer
> doesn't force a frame for all functions if it isn't otherwise needed, and
> hence doesn't guarantee a frame pointer for all functions." .
>

Like this?

Note that @option{-fno-omit-frame-pointer} doesn't force a new stack
frame for all functions if it isn't otherwise needed, and hence doesn't
guarantee a new frame pointer for all functions.

Here is the updated patch with a note for -fno-omit-frame-pointer.

OK for trunk?

-- 
H.J.

[-- Attachment #2: 0001-i386-Don-t-use-frame-pointer-without-stack-access.patch --]
[-- Type: text/x-patch, Size: 8996 bytes --]

From 4f6197699ab0904671919187c71d4a54594c1431 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 6 Aug 2017 06:24:36 -0700
Subject: [PATCH] i386: Don't use frame pointer without stack access

When there is no stack access, there is no need to use frame pointer
even if -fno-omit-frame-pointer is used and caller's frame pointer is
unchanged.

gcc/

	PR target/81736
	* config/i386/i386.c (ix86_finalize_stack_realign_flags): Renamed
	to ...
	(ix86_finalize_stack_frame_flags): This.  Also clear
	frame_pointer_needed if -fno-omit-frame-pointer is used without
	stack access.
	(ix86_expand_prologue): Replace ix86_finalize_stack_realign_flags
	with ix86_finalize_stack_frame_flags.
	(ix86_expand_epilogue): Likewise.
	(ix86_expand_split_stack_prologue): Likewise.
	* doc/invoke.texi: Add a note for -fno-omit-frame-pointer.

gcc/testsuite/

	PR target/81736
	* gcc.target/i386/pr81736-1.c: New test.
	* gcc.target/i386/pr81736-2.c: Likewise.
	* gcc.target/i386/pr81736-3.c: Likewise.
	* gcc.target/i386/pr81736-4.c: Likewise.
	* gcc.target/i386/pr81736-5.c: Likewise.
	* gcc.target/i386/pr81736-6.c: Likewise.
	* gcc.target/i386/pr81736-7.c: Likewise.
---
 gcc/config/i386/i386.c                    | 23 ++++++++++++-----------
 gcc/doc/invoke.texi                       |  4 ++++
 gcc/testsuite/gcc.target/i386/pr81736-1.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-2.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-3.c | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-4.c | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-5.c | 20 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-6.c | 16 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-7.c | 13 +++++++++++++
 9 files changed, 114 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-7.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3f8519777f7..85aa4d4fbf5 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14179,10 +14179,11 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
-/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
-   to be generated in correct form.  */
+/* Finalize stack_realign_needed and frame_pointer_needed flags, which
+   will guide prologue/epilogue to be generated in correct form.  */
+
 static void
-ix86_finalize_stack_realign_flags (void)
+ix86_finalize_stack_frame_flags (void)
 {
   /* Check if stack realign is really needed after reload, and
      stores result in cfun */
@@ -14205,13 +14206,13 @@ ix86_finalize_stack_realign_flags (void)
     }
 
   /* If the only reason for frame_pointer_needed is that we conservatively
-     assumed stack realignment might be needed, but in the end nothing that
-     needed the stack alignment had been spilled, clear frame_pointer_needed
-     and say we don't need stack realignment.  */
-  if (stack_realign
+     assumed stack realignment might be needed or -fno-omit-frame-pointer
+     is used, but in the end nothing that needed the stack alignment had
+     been spilled nor stack access, clear frame_pointer_needed and say we
+     don't need stack realignment.  */
+  if ((stack_realign || !flag_omit_frame_pointer)
       && frame_pointer_needed
       && crtl->is_leaf
-      && flag_omit_frame_pointer
       && crtl->sp_is_unchanging
       && !ix86_current_function_calls_tls_descriptor
       && !crtl->accesses_prior_frames
@@ -14402,7 +14403,7 @@ ix86_expand_prologue (void)
   if (ix86_function_naked (current_function_decl))
     return;
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
 
   /* DRAP should not coexist with stack_realign_fp */
   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
@@ -15266,7 +15267,7 @@ ix86_expand_epilogue (int style)
       return;
     }
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = m->frame;
 
   m->fs.sp_realigned = stack_realign_fp;
@@ -15801,7 +15802,7 @@ ix86_expand_split_stack_prologue (void)
 
   gcc_assert (flag_split_stack && reload_completed);
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = cfun->machine->frame;
   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index cc0f5a00a4f..3753d8a992b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7366,6 +7366,10 @@ size) for 32-bit GNU/Linux x86 and 32-bit Darwin x86 targets is
 @option{-fomit-frame-pointer}.  You can configure GCC with the
 @option{--enable-frame-pointer} configure option to change the default.
 
+Note that @option{-fno-omit-frame-pointer} doesn't force a new stack
+frame for all functions if it isn't otherwise needed, and hence doesn't
+guarantee a new frame pointer for all functions.
+
 Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}.
 
 @item -foptimize-sibling-calls
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1.c b/gcc/testsuite/gcc.target/i386/pr81736-1.c
new file mode 100644
index 00000000000..92c7bc97a0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int i;
+
+int
+foo (void)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-2.c b/gcc/testsuite/gcc.target/i386/pr81736-2.c
new file mode 100644
index 00000000000..a3720879937
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (int i)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-3.c b/gcc/testsuite/gcc.target/i386/pr81736-3.c
new file mode 100644
index 00000000000..c3bde7dd933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+void
+foo (void)
+{
+  asm ("# " : : : "ebx");
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-4.c b/gcc/testsuite/gcc.target/i386/pr81736-4.c
new file mode 100644
index 00000000000..25f50016a64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+foo (int i1, int i2, int i3, int i4, int i5, int i6, int i7)
+{
+  return i7;
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-5.c b/gcc/testsuite/gcc.target/i386/pr81736-5.c
new file mode 100644
index 00000000000..e1602cf25ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -mavx" } */
+
+typedef int v8si __attribute__ ((vector_size (32)));
+
+void
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (v8si *out_start, v8si *out_end, v8si *regions)
+{
+  v8si base = regions[3];
+  *out_start = base;
+  *out_end = base;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
+/* Verify no dynamic realignment is performed.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-6.c b/gcc/testsuite/gcc.target/i386/pr81736-6.c
new file mode 100644
index 00000000000..6198574c8cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+struct foo
+{
+  int head;
+} a;
+
+int
+bar (void)
+{
+  return a.head != 0;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-7.c b/gcc/testsuite/gcc.target/i386/pr81736-7.c
new file mode 100644
index 00000000000..f947886e642
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int foo (void);
+
+int
+bar (void)
+{
+  return foo ();
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
-- 
2.13.4


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 12:27                                           ` H.J. Lu
@ 2017-08-09 15:04                                             ` Andi Kleen
  2017-08-09 15:05                                               ` Arjan van de Ven
  0 siblings, 1 reply; 41+ messages in thread
From: Andi Kleen @ 2017-08-09 15:04 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Michael Matz, Richard Biener, Richard Sandiford, GCC Patches,
	Arjan van de Ven, Jakub Jelinek, Alexander Monakov, Uros Bizjak

"H.J. Lu" <hjl.tools@gmail.com> writes:
>
> Like this?
>
> Note that @option{-fno-omit-frame-pointer} doesn't force a new stack
> frame for all functions if it isn't otherwise needed, and hence doesn't
> guarantee a new frame pointer for all functions.
>
> Here is the updated patch with a note for -fno-omit-frame-pointer.
>
> OK for trunk?

I suspect there will be still some unwinder or code patching
setups which rely on frame pointer everywhere become broken. But
doing the optimization for -fno-omit-frame-pointer by default
seems reasonable.

I would add a new option
-fforce-frame-pointer
that gives the old -fno-omit-frame-pointer back, so that
users relying on frame pointers everywhere have a workaround.

-Andi

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 15:04                                             ` Andi Kleen
@ 2017-08-09 15:05                                               ` Arjan van de Ven
  2017-08-09 15:14                                                 ` H.J. Lu
  0 siblings, 1 reply; 41+ messages in thread
From: Arjan van de Ven @ 2017-08-09 15:05 UTC (permalink / raw)
  To: Andi Kleen, H.J. Lu
  Cc: Michael Matz, Richard Biener, Richard Sandiford, GCC Patches,
	Jakub Jelinek, Alexander Monakov, Uros Bizjak

On 8/9/2017 8:04 AM, Andi Kleen wrote:
> I would add a new option
> -fforce-frame-pointer
> that gives the old -fno-omit-frame-pointer back, so that
> users relying on frame pointers everywhere have a workaround.

that function should also fix the current situation where the framepointer is not useful
because all actual code was moved to be outside the frame pointer code
(e.g.  push/mov/pop followed by the actual function code)

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 15:05                                               ` Arjan van de Ven
@ 2017-08-09 15:14                                                 ` H.J. Lu
  2017-08-09 15:26                                                   ` Andi Kleen
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-09 15:14 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Andi Kleen, Michael Matz, Richard Biener, Richard Sandiford,
	GCC Patches, Jakub Jelinek, Alexander Monakov, Uros Bizjak

On Wed, Aug 9, 2017 at 8:05 AM, Arjan van de Ven <arjan@linux.intel.com> wrote:
> On 8/9/2017 8:04 AM, Andi Kleen wrote:
>>
>> I would add a new option
>> -fforce-frame-pointer
>> that gives the old -fno-omit-frame-pointer back, so that
>> users relying on frame pointers everywhere have a workaround.

This must be much more specific.  How does it impact:

1. LTO
2. Function inlining.
3. Partial function inlining.
4. Shrink-wrapping.

Any of them can impact function stack frame.

>
> that function should also fix the current situation where the framepointer
> is not useful
> because all actual code was moved to be outside the frame pointer code
> (e.g.  push/mov/pop followed by the actual function code)
>



-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 15:14                                                 ` H.J. Lu
@ 2017-08-09 15:26                                                   ` Andi Kleen
  2017-08-09 17:28                                                     ` H.J. Lu
  0 siblings, 1 reply; 41+ messages in thread
From: Andi Kleen @ 2017-08-09 15:26 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Arjan van de Ven, Andi Kleen, Michael Matz, Richard Biener,
	Richard Sandiford, GCC Patches, Jakub Jelinek, Alexander Monakov,
	Uros Bizjak

> This must be much more specific.  How does it impact:
> 
> 1. LTO
> 2. Function inlining.
> 3. Partial function inlining.
> 4. Shrink-wrapping.
> 
> Any of them can impact function stack frame.

It doesn't. It's just to get back to the previous state.

Also these others already have explicit options to disable them.


-Andi

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 15:26                                                   ` Andi Kleen
@ 2017-08-09 17:28                                                     ` H.J. Lu
  2017-08-09 18:31                                                       ` H.J. Lu
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-09 17:28 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Arjan van de Ven, Michael Matz, Richard Biener,
	Richard Sandiford, GCC Patches, Jakub Jelinek, Alexander Monakov,
	Uros Bizjak

On Wed, Aug 9, 2017 at 8:26 AM, Andi Kleen <andi@firstfloor.org> wrote:
>> This must be much more specific.  How does it impact:
>>
>> 1. LTO
>> 2. Function inlining.
>> 3. Partial function inlining.
>> 4. Shrink-wrapping.
>>
>> Any of them can impact function stack frame.
>
> It doesn't. It's just to get back to the previous state.
>
> Also these others already have explicit options to disable them.
>

How about

item -fkeep-frame-pointer
@opindex fkeep-frame-pointer
Keep the frame pointer in a register for functions.  This option always
forces a new stack frame for all functions regardless of whether
@option{-fomit-frame-pointer} is enabled or not.  Disabled by default.


-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 17:28                                                     ` H.J. Lu
@ 2017-08-09 18:31                                                       ` H.J. Lu
  2017-08-10  7:19                                                         ` Richard Sandiford
  0 siblings, 1 reply; 41+ messages in thread
From: H.J. Lu @ 2017-08-09 18:31 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Arjan van de Ven, Michael Matz, Richard Biener,
	Richard Sandiford, GCC Patches, Jakub Jelinek, Alexander Monakov,
	Uros Bizjak

[-- Attachment #1: Type: text/plain, Size: 861 bytes --]

On Wed, Aug 9, 2017 at 10:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Aug 9, 2017 at 8:26 AM, Andi Kleen <andi@firstfloor.org> wrote:
>>> This must be much more specific.  How does it impact:
>>>
>>> 1. LTO
>>> 2. Function inlining.
>>> 3. Partial function inlining.
>>> 4. Shrink-wrapping.
>>>
>>> Any of them can impact function stack frame.
>>
>> It doesn't. It's just to get back to the previous state.
>>
>> Also these others already have explicit options to disable them.
>>
>
> How about
>
> item -fkeep-frame-pointer
> @opindex fkeep-frame-pointer
> Keep the frame pointer in a register for functions.  This option always
> forces a new stack frame for all functions regardless of whether
> @option{-fomit-frame-pointer} is enabled or not.  Disabled by default.
>

Here is the updated patch with -fkeep-frame-pointer.

OK for trunk?

-- 
H.J.

[-- Attachment #2: 0001-i386-Don-t-use-frame-pointer-without-stack-access.patch --]
[-- Type: text/x-patch, Size: 15245 bytes --]

From 6f5b42bbe2baa7977dcbf388219dabcb4a6b546d Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sun, 6 Aug 2017 06:24:36 -0700
Subject: [PATCH] i386: Don't use frame pointer without stack access

When there is no stack access, there is no need to use frame pointer
even if -fno-omit-frame-pointer is used and caller's frame pointer is
unchanged.  A command-line option, -fkeep-frame-pointer, is added to
always force a new stack frame.

gcc/

	PR target/81736
	* common.opt (-fomit-frame-pointer): Override
	-fkeep-frame-pointer.
	(-fkeep-frame-pointer): New command-line option.
	* config/i386/i386.c (ix86_finalize_stack_realign_flags): Renamed
	to ...
	(ix86_finalize_stack_frame_flags): This.  Also clear
	frame_pointer_needed if -fkeep-frame-pointer isn't used and
	,-fno-omit-frame-pointer is used without stack access.
	(ix86_expand_prologue): Replace ix86_finalize_stack_realign_flags
	with ix86_finalize_stack_frame_flags.
	(ix86_expand_epilogue): Likewise.
	(ix86_expand_split_stack_prologue): Likewise.
	* doc/invoke.texi: Document -fkeep-frame-pointer.  Update
	-fomit-frame-pointer.  Add a note for -fno-omit-frame-pointer.
	* toplev.c (process_options): Disable -fomit-frame-pointer if
	-fkeep-frame-pointer is used.

gcc/testsuite/

	PR target/81736
	* gcc.target/i386/pr81736-1a.c: New test.
	* gcc.target/i386/pr81736-1b.c: Likewise.
	* gcc.target/i386/pr81736-1c.c: Likewise.
	* gcc.target/i386/pr81736-1d.c: Likewise.
	* gcc.target/i386/pr81736-1e.c: Likewise.
	* gcc.target/i386/pr81736-1f.c: Likewise.
	* gcc.target/i386/pr81736-1g.c: Likewise.
	* gcc.target/i386/pr81736-2.c: Likewise.
	* gcc.target/i386/pr81736-3.c: Likewise.
	* gcc.target/i386/pr81736-4.c: Likewise.
	* gcc.target/i386/pr81736-5.c: Likewise.
	* gcc.target/i386/pr81736-6.c: Likewise.
	* gcc.target/i386/pr81736-7.c: Likewise.
---
 gcc/common.opt                             |  6 +++++-
 gcc/config/i386/i386.c                     | 24 +++++++++++++-----------
 gcc/doc/invoke.texi                        | 13 ++++++++++++-
 gcc/testsuite/gcc.target/i386/pr81736-1a.c | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-1b.c |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr81736-1c.c |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr81736-1d.c |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr81736-1e.c |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr81736-1f.c |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr81736-1g.c |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr81736-2.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-3.c  | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-4.c  | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-5.c  | 20 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-6.c  | 16 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr81736-7.c  | 13 +++++++++++++
 gcc/toplev.c                               |  4 ++++
 17 files changed, 174 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1d.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1e.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1f.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-1g.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr81736-7.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 1cb1c83d306..b73564d7145 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1881,9 +1881,13 @@ EnumValue
 Enum(offload_abi) String(lp64) Value(OFFLOAD_ABI_LP64)
 
 fomit-frame-pointer
-Common Report Var(flag_omit_frame_pointer) Optimization
+Common Report Var(flag_omit_frame_pointer) Negative(fkeep-frame-pointer) Optimization
 When possible do not generate stack frames.
 
+fkeep-frame-pointer
+Common Report Var(flag_keep_frame_pointer) Negative(fomit-frame-pointer)
+Always force a new stack frame
+
 fopt-info
 Common Report Var(flag_opt_info) Optimization
 Enable all optimization info dumps on stderr.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3f8519777f7..9c37e960dc8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14179,10 +14179,11 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
-/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
-   to be generated in correct form.  */
+/* Finalize stack_realign_needed and frame_pointer_needed flags, which
+   will guide prologue/epilogue to be generated in correct form.  */
+
 static void
-ix86_finalize_stack_realign_flags (void)
+ix86_finalize_stack_frame_flags (void)
 {
   /* Check if stack realign is really needed after reload, and
      stores result in cfun */
@@ -14205,13 +14206,14 @@ ix86_finalize_stack_realign_flags (void)
     }
 
   /* If the only reason for frame_pointer_needed is that we conservatively
-     assumed stack realignment might be needed, but in the end nothing that
-     needed the stack alignment had been spilled, clear frame_pointer_needed
-     and say we don't need stack realignment.  */
-  if (stack_realign
+     assumed stack realignment might be needed or -fno-omit-frame-pointer
+     is used, but in the end nothing that needed the stack alignment had
+     been spilled nor stack access, clear frame_pointer_needed and say we
+     don't need stack realignment.  */
+  if ((stack_realign || !flag_omit_frame_pointer)
+      && !flag_keep_frame_pointer
       && frame_pointer_needed
       && crtl->is_leaf
-      && flag_omit_frame_pointer
       && crtl->sp_is_unchanging
       && !ix86_current_function_calls_tls_descriptor
       && !crtl->accesses_prior_frames
@@ -14402,7 +14404,7 @@ ix86_expand_prologue (void)
   if (ix86_function_naked (current_function_decl))
     return;
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
 
   /* DRAP should not coexist with stack_realign_fp */
   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
@@ -15266,7 +15268,7 @@ ix86_expand_epilogue (int style)
       return;
     }
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = m->frame;
 
   m->fs.sp_realigned = stack_realign_fp;
@@ -15801,7 +15803,7 @@ ix86_expand_split_stack_prologue (void)
 
   gcc_assert (flag_split_stack && reload_completed);
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = cfun->machine->frame;
   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index cc0f5a00a4f..7a3b3d53f17 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -397,7 +397,7 @@ Objective-C and Objective-C++ Dialects}.
 -fno-peephole2  -fno-printf-return-value  -fno-sched-interblock @gol
 -fno-sched-spec  -fno-signed-zeros @gol
 -fno-toplevel-reorder  -fno-trapping-math  -fno-zero-initialized-in-bss @gol
--fomit-frame-pointer  -foptimize-sibling-calls @gol
+-fomit-frame-pointer -fkeep-frame-pointer -foptimize-sibling-calls @gol
 -fpartial-inlining  -fpeel-loops  -fpredictive-commoning @gol
 -fprefetch-loop-arrays @gol
 -fprofile-correction @gol
@@ -7365,9 +7365,20 @@ The default setting (when not optimizing for
 size) for 32-bit GNU/Linux x86 and 32-bit Darwin x86 targets is
 @option{-fomit-frame-pointer}.  You can configure GCC with the
 @option{--enable-frame-pointer} configure option to change the default.
+This option overrides @option{-fkeep-frame-pointer}.
+
+Note that @option{-fno-omit-frame-pointer} doesn't force a new stack
+frame for all functions if it isn't otherwise needed, and hence doesn't
+guarantee a new frame pointer for all functions.
 
 Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}.
 
+@item -fkeep-frame-pointer
+@opindex fkeep-frame-pointer
+Keep the frame pointer in a register for functions.  This option always
+forces a new stack frame for all functions.  This option overrides
+@option{-fomit-frame-pointer}.  Disabled by default.
+
 @item -foptimize-sibling-calls
 @opindex foptimize-sibling-calls
 Optimize sibling and tail recursive calls.
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1a.c b/gcc/testsuite/gcc.target/i386/pr81736-1a.c
new file mode 100644
index 00000000000..92c7bc97a0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1a.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int i;
+
+int
+foo (void)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1b.c b/gcc/testsuite/gcc.target/i386/pr81736-1b.c
new file mode 100644
index 00000000000..692ca9b7a30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fkeep-frame-pointer -fno-omit-frame-pointer" } */
+
+#include "pr81736-1a.c"
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1c.c b/gcc/testsuite/gcc.target/i386/pr81736-1c.c
new file mode 100644
index 00000000000..36ee6bc63f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -fno-omit-frame-pointer" } */
+
+#include "pr81736-1a.c"
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1d.c b/gcc/testsuite/gcc.target/i386/pr81736-1d.c
new file mode 100644
index 00000000000..143985852f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1d.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fkeep-frame-pointer -fomit-frame-pointer" } */
+
+#include "pr81736-1a.c"
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1e.c b/gcc/testsuite/gcc.target/i386/pr81736-1e.c
new file mode 100644
index 00000000000..fe98e96074c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1e.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -fkeep-frame-pointer" } */
+
+#include "pr81736-1a.c"
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1f.c b/gcc/testsuite/gcc.target/i386/pr81736-1f.c
new file mode 100644
index 00000000000..682cb3b4030
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1f.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fkeep-frame-pointer" } */
+
+#include "pr81736-1a.c"
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-1g.c b/gcc/testsuite/gcc.target/i386/pr81736-1g.c
new file mode 100644
index 00000000000..b6d19985c8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-1g.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -fkeep-frame-pointer" } */
+
+#include "pr81736-1a.c"
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-2.c b/gcc/testsuite/gcc.target/i386/pr81736-2.c
new file mode 100644
index 00000000000..a3720879937
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (int i)
+{
+  return i;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-3.c b/gcc/testsuite/gcc.target/i386/pr81736-3.c
new file mode 100644
index 00000000000..c3bde7dd933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+void
+foo (void)
+{
+  asm ("# " : : : "ebx");
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-4.c b/gcc/testsuite/gcc.target/i386/pr81736-4.c
new file mode 100644
index 00000000000..25f50016a64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+int
+foo (int i1, int i2, int i3, int i4, int i5, int i6, int i7)
+{
+  return i7;
+}
+
+/* Need to use a frame pointer.  */
+/* { dg-final { scan-assembler "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-5.c b/gcc/testsuite/gcc.target/i386/pr81736-5.c
new file mode 100644
index 00000000000..e1602cf25ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -mavx" } */
+
+typedef int v8si __attribute__ ((vector_size (32)));
+
+void
+#ifndef __x86_64__
+__attribute__((regparm(3)))
+#endif
+foo (v8si *out_start, v8si *out_end, v8si *regions)
+{
+  v8si base = regions[3];
+  *out_start = base;
+  *out_end = base;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
+/* Verify no dynamic realignment is performed.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-6.c b/gcc/testsuite/gcc.target/i386/pr81736-6.c
new file mode 100644
index 00000000000..6198574c8cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-6.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+struct foo
+{
+  int head;
+} a;
+
+int
+bar (void)
+{
+  return a.head != 0;
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr81736-7.c b/gcc/testsuite/gcc.target/i386/pr81736-7.c
new file mode 100644
index 00000000000..f947886e642
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr81736-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" } */
+
+extern int foo (void);
+
+int
+bar (void)
+{
+  return foo ();
+}
+
+/* No need to use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/toplev.c b/gcc/toplev.c
index d23714c4773..55069784bdd 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1297,6 +1297,10 @@ process_options (void)
 	   "-floop-parallelize-all)");
 #endif
 
+  /* Don't omit frame pointer if -fkeep-frame-pointer is used.  */
+  if (flag_keep_frame_pointer)
+    flag_omit_frame_pointer = 0;
+
   if (flag_check_pointer_bounds)
     {
       if (targetm.chkp_bound_mode () == VOIDmode)
-- 
2.13.4


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-09 18:31                                                       ` H.J. Lu
@ 2017-08-10  7:19                                                         ` Richard Sandiford
  2017-08-10  7:40                                                           ` Richard Sandiford
  2017-08-10  7:51                                                           ` Uros Bizjak
  0 siblings, 2 replies; 41+ messages in thread
From: Richard Sandiford @ 2017-08-10  7:19 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Andi Kleen, Arjan van de Ven, Michael Matz, Richard Biener,
	GCC Patches, Jakub Jelinek, Alexander Monakov, Uros Bizjak

"H.J. Lu" <hjl.tools@gmail.com> writes:
> On Wed, Aug 9, 2017 at 10:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Aug 9, 2017 at 8:26 AM, Andi Kleen <andi@firstfloor.org> wrote:
>>>> This must be much more specific.  How does it impact:
>>>>
>>>> 1. LTO
>>>> 2. Function inlining.
>>>> 3. Partial function inlining.
>>>> 4. Shrink-wrapping.
>>>>
>>>> Any of them can impact function stack frame.
>>>
>>> It doesn't. It's just to get back to the previous state.
>>>
>>> Also these others already have explicit options to disable them.
>>>
>>
>> How about
>>
>> item -fkeep-frame-pointer
>> @opindex fkeep-frame-pointer
>> Keep the frame pointer in a register for functions.  This option always
>> forces a new stack frame for all functions regardless of whether
>> @option{-fomit-frame-pointer} is enabled or not.  Disabled by default.
>>
>
> Here is the updated patch with -fkeep-frame-pointer.

It sounds like there's still some disagreement about what this option
should mean; Andi's and Arjan's replies didn't seem to be asking for
the same thing.

I think as implemented the patch just retains the GCC 7 x86 behaviour of
-fno-omit-frame-pointer, i.e. forces a frame to be created *somewhere*
between the start and end addresses of the function, but makes no
guarantee where.  It could be a bundle of three instructions somewhere
in the middle of a basic block, and the code might not be executed for
all paths through the function (e.g. it might only be executed on
error paths).

I think even if we think that's useful, it should be documented clearly.
Otherwise people might assume that a function f is guaranteed to set up
a frame every time it's called.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-10  7:19                                                         ` Richard Sandiford
@ 2017-08-10  7:40                                                           ` Richard Sandiford
  2017-08-10  7:51                                                           ` Uros Bizjak
  1 sibling, 0 replies; 41+ messages in thread
From: Richard Sandiford @ 2017-08-10  7:40 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Andi Kleen, Arjan van de Ven, Michael Matz, Richard Biener,
	GCC Patches, Jakub Jelinek, Alexander Monakov, Uros Bizjak

"H.J. Lu" <hjl.tools@gmail.com> writes:
> On Wed, Aug 9, 2017 at 10:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Aug 9, 2017 at 8:26 AM, Andi Kleen <andi@firstfloor.org> wrote:
>>>> This must be much more specific.  How does it impact:
>>>>
>>>> 1. LTO
>>>> 2. Function inlining.
>>>> 3. Partial function inlining.
>>>> 4. Shrink-wrapping.
>>>>
>>>> Any of them can impact function stack frame.
>>>
>>> It doesn't. It's just to get back to the previous state.
>>>
>>> Also these others already have explicit options to disable them.
>>>
>>
>> How about
>>
>> item -fkeep-frame-pointer
>> @opindex fkeep-frame-pointer
>> Keep the frame pointer in a register for functions.  This option always
>> forces a new stack frame for all functions regardless of whether
>> @option{-fomit-frame-pointer} is enabled or not.  Disabled by default.
>>
>
> Here is the updated patch with -fkeep-frame-pointer.

It sounds like there's still some disagreement about what this option
should mean; Andi's and Arjan's replies didn't seem to be asking for
the same thing.

I think as implemented the patch just retains the GCC 7 x86 behaviour of
-fno-omit-frame-pointer, i.e. forces a frame to be created *somewhere*
between the start and end addresses of the function, but makes no
guarantee where.  It could be a bundle of three instructions somewhere
in the middle of a basic block, and the code might not be executed for
all paths through the function (e.g. it might only be executed on
error paths).

I think even if we think that's useful, it should be documented clearly.
Otherwise people might assume that a function f is guaranteed to set up
a frame every time it's called.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-10  7:19                                                         ` Richard Sandiford
  2017-08-10  7:40                                                           ` Richard Sandiford
@ 2017-08-10  7:51                                                           ` Uros Bizjak
  2017-08-10 14:07                                                             ` H.J. Lu
  1 sibling, 1 reply; 41+ messages in thread
From: Uros Bizjak @ 2017-08-10  7:51 UTC (permalink / raw)
  To: H.J. Lu, Andi Kleen, Arjan van de Ven, Michael Matz,
	Richard Biener, GCC Patches, Jakub Jelinek, Alexander Monakov,
	Uros Bizjak, richard.sandiford

On Thu, Aug 10, 2017 at 9:09 AM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> "H.J. Lu" <hjl.tools@gmail.com> writes:
>> On Wed, Aug 9, 2017 at 10:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Wed, Aug 9, 2017 at 8:26 AM, Andi Kleen <andi@firstfloor.org> wrote:
>>>>> This must be much more specific.  How does it impact:
>>>>>
>>>>> 1. LTO
>>>>> 2. Function inlining.
>>>>> 3. Partial function inlining.
>>>>> 4. Shrink-wrapping.
>>>>>
>>>>> Any of them can impact function stack frame.
>>>>
>>>> It doesn't. It's just to get back to the previous state.
>>>>
>>>> Also these others already have explicit options to disable them.
>>>>
>>>
>>> How about
>>>
>>> item -fkeep-frame-pointer
>>> @opindex fkeep-frame-pointer
>>> Keep the frame pointer in a register for functions.  This option always
>>> forces a new stack frame for all functions regardless of whether
>>> @option{-fomit-frame-pointer} is enabled or not.  Disabled by default.
>>>
>>
>> Here is the updated patch with -fkeep-frame-pointer.
>
> It sounds like there's still some disagreement about what this option
> should mean; Andi's and Arjan's replies didn't seem to be asking for
> the same thing.
>
> I think as implemented the patch just retains the GCC 7 x86 behaviour of
> -fno-omit-frame-pointer, i.e. forces a frame to be created *somewhere*
> between the start and end addresses of the function, but makes no
> guarantee where.  It could be a bundle of three instructions somewhere
> in the middle of a basic block, and the code might not be executed for
> all paths through the function (e.g. it might only be executed on
> error paths).
>
> I think even if we think that's useful, it should be documented clearly.
> Otherwise people might assume that a function f is guaranteed to set up
> a frame every time it's called.

As said earlier, I think we should proceed with the previous patch
(that documents -fno-omit-frame-pointer limitations), It was
demonstrated that the patch does not make current situation worse.

-fkeep-frame-pointer is an orthogonal issue, and this option should
guarantee frame formation in *all* situations.This means that the
option should disable (partial) inlining, shrink-wrapping, etc.
Actually, it would disable so much optimizations, that its usefullnes
is questioned. OTOH, nobody ever complained about limited FP debugging
"experience" when mentioned optimizations were activated.

BTW: The option should be called -fforce-frame-pointer, but I really
doubt about its usefullnes.

Uros.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH] i386: Don't use frame pointer without stack access
  2017-08-10  7:51                                                           ` Uros Bizjak
@ 2017-08-10 14:07                                                             ` H.J. Lu
  0 siblings, 0 replies; 41+ messages in thread
From: H.J. Lu @ 2017-08-10 14:07 UTC (permalink / raw)
  To: Uros Bizjak
  Cc: Andi Kleen, Arjan van de Ven, Michael Matz, Richard Biener,
	GCC Patches, Jakub Jelinek, Alexander Monakov, Richard Sandiford

On Thu, Aug 10, 2017 at 12:39 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Aug 10, 2017 at 9:09 AM, Richard Sandiford
> <richard.sandiford@linaro.org> wrote:
>> "H.J. Lu" <hjl.tools@gmail.com> writes:
>>> On Wed, Aug 9, 2017 at 10:28 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Wed, Aug 9, 2017 at 8:26 AM, Andi Kleen <andi@firstfloor.org> wrote:
>>>>>> This must be much more specific.  How does it impact:
>>>>>>
>>>>>> 1. LTO
>>>>>> 2. Function inlining.
>>>>>> 3. Partial function inlining.
>>>>>> 4. Shrink-wrapping.
>>>>>>
>>>>>> Any of them can impact function stack frame.
>>>>>
>>>>> It doesn't. It's just to get back to the previous state.
>>>>>
>>>>> Also these others already have explicit options to disable them.
>>>>>
>>>>
>>>> How about
>>>>
>>>> item -fkeep-frame-pointer
>>>> @opindex fkeep-frame-pointer
>>>> Keep the frame pointer in a register for functions.  This option always
>>>> forces a new stack frame for all functions regardless of whether
>>>> @option{-fomit-frame-pointer} is enabled or not.  Disabled by default.
>>>>
>>>
>>> Here is the updated patch with -fkeep-frame-pointer.
>>
>> It sounds like there's still some disagreement about what this option
>> should mean; Andi's and Arjan's replies didn't seem to be asking for
>> the same thing.
>>
>> I think as implemented the patch just retains the GCC 7 x86 behaviour of
>> -fno-omit-frame-pointer, i.e. forces a frame to be created *somewhere*
>> between the start and end addresses of the function, but makes no
>> guarantee where.  It could be a bundle of three instructions somewhere
>> in the middle of a basic block, and the code might not be executed for
>> all paths through the function (e.g. it might only be executed on
>> error paths).
>>
>> I think even if we think that's useful, it should be documented clearly.
>> Otherwise people might assume that a function f is guaranteed to set up
>> a frame every time it's called.
>
> As said earlier, I think we should proceed with the previous patch
> (that documents -fno-omit-frame-pointer limitations), It was
> demonstrated that the patch does not make current situation worse.
>

I will check in my previous patch today.

Thanks.


-- 
H.J.

^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2017-08-10 13:42 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-06 19:40 [PATCH] i386: Don't use frame pointer without stack access H.J. Lu
2017-08-07  6:21 ` Uros Bizjak
2017-08-07 13:15   ` Michael Matz
2017-08-07 13:21     ` Uros Bizjak
2017-08-07 13:25       ` H.J. Lu
2017-08-07 13:32         ` Michael Matz
2017-08-07 13:38           ` H.J. Lu
2017-08-07 13:49             ` Michael Matz
2017-08-07 14:06               ` Alexander Monakov
2017-08-07 15:39                 ` H.J. Lu
2017-08-07 15:43                   ` Jakub Jelinek
2017-08-07 16:06                     ` Arjan van de Ven
2017-08-07 16:16                       ` Michael Matz
2017-08-07 16:19                         ` Arjan van de Ven
2017-08-07 16:21                           ` H.J. Lu
2017-08-07 16:28                           ` Michael Matz
2017-08-07 20:05                       ` Richard Sandiford
2017-08-08 16:38                         ` H.J. Lu
2017-08-08 17:01                           ` Richard Biener
2017-08-08 17:34                             ` Richard Sandiford
2017-08-08 17:36                               ` Richard Sandiford
2017-08-08 18:00                                 ` Richard Biener
2017-08-08 18:29                                   ` H.J. Lu
2017-08-09  7:53                                   ` Richard Sandiford
2017-08-09 11:22                                     ` Richard Biener
2017-08-09 11:31                                       ` H.J. Lu
2017-08-09 11:59                                         ` Michael Matz
2017-08-09 12:27                                           ` H.J. Lu
2017-08-09 15:04                                             ` Andi Kleen
2017-08-09 15:05                                               ` Arjan van de Ven
2017-08-09 15:14                                                 ` H.J. Lu
2017-08-09 15:26                                                   ` Andi Kleen
2017-08-09 17:28                                                     ` H.J. Lu
2017-08-09 18:31                                                       ` H.J. Lu
2017-08-10  7:19                                                         ` Richard Sandiford
2017-08-10  7:40                                                           ` Richard Sandiford
2017-08-10  7:51                                                           ` Uros Bizjak
2017-08-10 14:07                                                             ` H.J. Lu
2017-08-08 17:05                           ` Uros Bizjak
2017-08-07 18:40               ` Uros Bizjak
2017-08-07 13:38           ` Andreas Schwab

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).