public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] Fix PR64242
@ 2018-12-07 14:52 Wilco Dijkstra
  2018-12-07 15:55 ` Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Wilco Dijkstra @ 2018-12-07 14:52 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

Improve the fix for PR64242.  Various optimizations can change a memory reference
into a frame access.  Given there are multiple virtual frame pointers which may
be replaced by multiple hard frame pointers, there are no checks for writes to the
various frame pointers.  So updates to a frame pointer tends to generate incorrect
code.  Improve the previous fix to also add clobbers of several frame pointers and
add a scheduling barrier.  This should work in most cases until GCC supports a
generic "don't optimize across this instruction" feature.

Also improve testcase to handle alloca implementations which allocate too much,
and increase stack sizes to avoid accidentally passing the test like reported by
Jakub in comment #10 in the PR.

Bootstrap OK. Testcase passes on AArch64 and x86-64.  Inspected x86, Arm,
Thumb-1 and Thumb-2 assembler which looks correct.

ChangeLog:
2018-12-07  Wilco Dijkstra  <wdijkstr@arm.com>

gcc/
	PR middle-end/64242
	* builtins.c (expand_builtin_longjmp): Add frame clobbers and schedule block.
	(expand_builtin_nonlocal_goto): Likewise.

testsuite/
	PR middle-end/64242
	* gcc.c-torture/execute/pr64242.c: Update test.

---

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 669e548706f537fa9a92c5f47f30fc3c6ee38176..2ef9c9afcc69fcb775dc6a6fff550025bdc76337 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -1138,15 +1138,20 @@ expand_builtin_longjmp (rtx buf_addr, rtx value)
 	emit_insn (targetm.gen_nonlocal_goto (value, lab, stack, fp));
       else
 	{
-	  lab = copy_to_reg (lab);
-
 	  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
 	  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
 
+	  lab = copy_to_reg (lab);
+
 	  /* Restore the frame pointer and stack pointer.  We must use a
 	     temporary since the setjmp buffer may be a local.  */
 	  fp = copy_to_reg (fp);
 	  emit_stack_restore (SAVE_NONLOCAL, stack);
+
+	  /* Ensure the frame pointer move is not optimized.  */
+	  emit_insn (gen_blockage ());
+	  emit_clobber (hard_frame_pointer_rtx);
+	  emit_clobber (frame_pointer_rtx);
 	  emit_move_insn (hard_frame_pointer_rtx, fp);
 
 	  emit_use (hard_frame_pointer_rtx);
@@ -1285,15 +1290,20 @@ expand_builtin_nonlocal_goto (tree exp)
     emit_insn (targetm.gen_nonlocal_goto (const0_rtx, r_label, r_sp, r_fp));
   else
     {
-      r_label = copy_to_reg (r_label);
-
       emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
       emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
 
+      r_label = copy_to_reg (r_label);
+
       /* Restore the frame pointer and stack pointer.  We must use a
 	 temporary since the setjmp buffer may be a local.  */
       r_fp = copy_to_reg (r_fp);
       emit_stack_restore (SAVE_NONLOCAL, r_sp);
+
+      /* Ensure the frame pointer move is not optimized.  */
+      emit_insn (gen_blockage ());
+      emit_clobber (hard_frame_pointer_rtx);
+      emit_clobber (frame_pointer_rtx);
       emit_move_insn (hard_frame_pointer_rtx, r_fp);
 
       /* USE of hard_frame_pointer_rtx added for consistency;
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr64242.c b/gcc/testsuite/gcc.c-torture/execute/pr64242.c
index 46a7b23d28d71604d141281c21fb0b77849b1b0d..e6139ede3f34d587ac53d04e286e5d75fd2ca76c 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr64242.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr64242.c
@@ -5,46 +5,31 @@ extern void abort (void);
 __attribute ((noinline)) void
 broken_longjmp (void *p)
 {
-  void *buf[5];
+  void *buf[32];
   __builtin_memcpy (buf, p, 5 * sizeof (void*));
   /* Corrupts stack pointer...  */
   __builtin_longjmp (buf, 1);
 }
 
-__attribute ((noipa)) __UINTPTR_TYPE__
-foo (void *p)
-{
-  return (__UINTPTR_TYPE__) p;
-}
-
-__attribute ((noipa)) void
-bar (void *p)
-{
-  asm volatile ("" : : "r" (p));
-}
-
 volatile int x = 0;
-void *volatile p;
-void *volatile q;
+char *volatile p;
+char *volatile q;
 
 int
 main ()
 {
   void *buf[5];
-  struct __attribute__((aligned (32))) S { int a[4]; } s;
-  bar (&s);
   p = __builtin_alloca (x);
+  q = __builtin_alloca (x);
   if (!__builtin_setjmp (buf))
     broken_longjmp (buf);
 
+  /* Compute expected next alloca offset - some targets don't align properly
+     and allocate too much.  */
+  p = q + (q - p);
+
   /* Fails if stack pointer corrupted.  */
-  q = __builtin_alloca (x);
-  if (foo (p) < foo (q))
-    {
-      if (foo (q) - foo (p) >= 1024)
-	abort ();
-    }
-  else if (foo (p) - foo (q) >= 1024)
+  if (p != __builtin_alloca (x))
     abort ();
 
   return 0;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Fix PR64242
  2018-12-07 14:52 [PATCH v2] Fix PR64242 Wilco Dijkstra
@ 2018-12-07 15:55 ` Jakub Jelinek
  2018-12-07 16:19   ` Wilco Dijkstra
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2018-12-07 15:55 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches, nd

On Fri, Dec 07, 2018 at 02:52:48PM +0000, Wilco Dijkstra wrote:
> -  struct __attribute__((aligned (32))) S { int a[4]; } s;                                                                                         
> -  bar (&s);                                                                                                                                       

Any reason to remove the above?

>    p = __builtin_alloca (x);
> +  q = __builtin_alloca (x);
>    if (!__builtin_setjmp (buf))
>      broken_longjmp (buf);
>  
> +  /* Compute expected next alloca offset - some targets don't align properly
> +     and allocate too much.  */
> +  p = q + (q - p);

This is UB, pointer difference is only defined within the same object.
So, you can only do such subtraction in some integral type rather than as
pointer subtraction. 

> +
>    /* Fails if stack pointer corrupted.  */
> -  q = __builtin_alloca (x);
> -  if (foo (p) < foo (q))
> -    {
> -      if (foo (q) - foo (p) >= 1024)
> -	abort ();
> -    }
> -  else if (foo (p) - foo (q) >= 1024)
> +  if (p != __builtin_alloca (x))

And I'm not sure you have a guarantee that every zero sized alloca is at the
same offset from the previous one.

	Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Fix PR64242
  2018-12-07 15:55 ` Jakub Jelinek
@ 2018-12-07 16:19   ` Wilco Dijkstra
  2018-12-07 16:23     ` Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Wilco Dijkstra @ 2018-12-07 16:19 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: GCC Patches, nd

Hi,

Jakub Jelinek wrote:
> On Fri, Dec 07, 2018 at 02:52:48PM +0000, Wilco Dijkstra wrote:
>> -  struct __attribute__((aligned (32))) S { int a[4]; } s;                                                                                        
>> -  bar (&s);                                                                                                                                      
>
> Any reason to remove the above?

The test case doesn't need an aligned object to fail, so why did you add it?

>> +  /* Compute expected next alloca offset - some targets don't align properly
>> +     and allocate too much.  */
>> +  p = q + (q - p);
>
> This is UB, pointer difference is only defined within the same object.
> So, you can only do such subtraction in some integral type rather than as
> pointer subtraction. 

__builtin_setjmp is already undefined behaviour, and the stack corruption is
even more undefined - trying to avoid harmless theoretical undefined behaviour
wouldn't be helpful.

> And I'm not sure you have a guarantee that every zero sized alloca is at the
> same offset from the previous one.

The above pointer adjustment handles the case where alloca overallocates.
It passes on x86-64 which always adds 8 unnecessary bytes.

Wilco

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Fix PR64242
  2018-12-07 16:19   ` Wilco Dijkstra
@ 2018-12-07 16:23     ` Jakub Jelinek
  2018-12-07 16:49       ` Wilco Dijkstra
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2018-12-07 16:23 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches, nd

On Fri, Dec 07, 2018 at 04:19:22PM +0000, Wilco Dijkstra wrote:
> Jakub Jelinek wrote:
> > On Fri, Dec 07, 2018 at 02:52:48PM +0000, Wilco Dijkstra wrote:
> >> -  struct __attribute__((aligned (32))) S { int a[4]; } s;                                                                                        
> >> -  bar (&s);                                                                                                                                      
> >
> > Any reason to remove the above?
> 
> The test case doesn't need an aligned object to fail, so why did you add it?

It needed it on i686, because otherwise it happened to see the value it
wanted in the caller's stack frame.

> >> +  /* Compute expected next alloca offset - some targets don't align properly
> >> +     and allocate too much.  */
> >> +  p = q + (q - p);
> >
> > This is UB, pointer difference is only defined within the same object.
> > So, you can only do such subtraction in some integral type rather than as
> > pointer subtraction. 
> 
> __builtin_setjmp is already undefined behaviour, and the stack corruption is
> even more undefined - trying to avoid harmless theoretical undefined behaviour
> wouldn't be helpful.

No, __builtin_setjmp is a GNU extension, not undefined behavior.  And
something that is UB and might be harmless today might be harmful tomorrow,
gcc optimizes heavily on the assumption that UB doesn't happen in the
program, so might optimize that subtraction to 0 or 42 or whatever else.

> 
> > And I'm not sure you have a guarantee that every zero sized alloca is at the
> > same offset from the previous one.
> 
> The above pointer adjustment handles the case where alloca overallocates.
> It passes on x86-64 which always adds 8 unnecessary bytes.

What guarantee is there that it overallocates each time the same though?

	Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Fix PR64242
  2018-12-07 16:23     ` Jakub Jelinek
@ 2018-12-07 16:49       ` Wilco Dijkstra
  2019-01-10 13:06         ` Wilco Dijkstra
  0 siblings, 1 reply; 8+ messages in thread
From: Wilco Dijkstra @ 2018-12-07 16:49 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: GCC Patches, nd

Hi,

Jakub Jelinek wrote:
On Fri, Dec 07, 2018 at 04:19:22PM +0000, Wilco Dijkstra wrote:

>> The test case doesn't need an aligned object to fail, so why did you add it?
>
> It needed it on i686, because otherwise it happened to see the value it
> wanted in the caller's stack frame.

Right, so I fixed that by increasing the size of the frame in broken_setjmp to be
larger than the frame in main, so it's now extremely unlikely to accidentally read
from a random stack location and end up with a valid stack pointer.

> >> +  /* Compute expected next alloca offset - some targets don't align properly
> >> +     and allocate too much.  */
> >> +  p = q + (q - p);
> >
> > This is UB, pointer difference is only defined within the same object.
> > So, you can only do such subtraction in some integral type rather than as
> > pointer subtraction. 
> 
> __builtin_setjmp is already undefined behaviour, and the stack corruption is
> even more undefined - trying to avoid harmless theoretical undefined behaviour
> wouldn't be helpful.

> No, __builtin_setjmp is a GNU extension, not undefined behavior.  

Well the evidence is that it's undocumented, unspecified and causes undefined
behaviour...

> And 
> something that is UB and might be harmless today might be harmful tomorrow,
> gcc optimizes heavily on the assumption that UB doesn't happen in the
> program, so might optimize that subtraction to 0 or 42 or whatever else.
>
>> > And I'm not sure you have a guarantee that every zero sized alloca is at the
>> > same offset from the previous one.
>> 
>> The above pointer adjustment handles the case where alloca overallocates.
>> It passes on x86-64 which always adds 8 unnecessary bytes.
>
> What guarantee is there that it overallocates each time the same though?

How could it not be? It could only vary if it was reading an uninitialized register or
adding a random extra amount as a form of ASLR. But there is no point in trying
to support future unknown features/bugs since it will give false negatives today.

Wilco
    

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Fix PR64242
  2018-12-07 16:49       ` Wilco Dijkstra
@ 2019-01-10 13:06         ` Wilco Dijkstra
  0 siblings, 0 replies; 8+ messages in thread
From: Wilco Dijkstra @ 2019-01-10 13:06 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: GCC Patches, nd

Hi Jakub,

Any other comments? I'd like to finish this rather than leaving it in its current
half-done state.

Wilco
  

Hi,

Jakub Jelinek wrote:
On Fri, Dec 07, 2018 at 04:19:22PM +0000, Wilco Dijkstra wrote:

>> The test case doesn't need an aligned object to fail, so why did you add it?
>
> It needed it on i686, because otherwise it happened to see the value it
> wanted in the caller's stack frame.

Right, so I fixed that by increasing the size of the frame in broken_setjmp to be
larger than the frame in main, so it's now extremely unlikely to accidentally read
from a random stack location and end up with a valid stack pointer.

> >> +  /* Compute expected next alloca offset - some targets don't align properly
> >> +     and allocate too much.  */
> >> +  p = q + (q - p);
> >
> > This is UB, pointer difference is only defined within the same object.
> > So, you can only do such subtraction in some integral type rather than as
> > pointer subtraction. 
> 
> __builtin_setjmp is already undefined behaviour, and the stack corruption is
> even more undefined - trying to avoid harmless theoretical undefined behaviour
> wouldn't be helpful.

> No, __builtin_setjmp is a GNU extension, not undefined behavior.  

Well the evidence is that it's undocumented, unspecified and causes undefined
behaviour...

> And 
> something that is UB and might be harmless today might be harmful tomorrow,
> gcc optimizes heavily on the assumption that UB doesn't happen in the
> program, so might optimize that subtraction to 0 or 42 or whatever else.
>
>> > And I'm not sure you have a guarantee that every zero sized alloca is at the
>> > same offset from the previous one.
>> 
>> The above pointer adjustment handles the case where alloca overallocates.
>> It passes on x86-64 which always adds 8 unnecessary bytes.
>
> What guarantee is there that it overallocates each time the same though?

How could it not be? It could only vary if it was reading an uninitialized register or
adding a random extra amount as a form of ASLR. But there is no point in trying
to support future unknown features/bugs since it will give false negatives today.

Wilco
        

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Fix PR64242
  2019-05-28 17:33 Wilco Dijkstra
@ 2019-05-31 21:41 ` Jeff Law
  0 siblings, 0 replies; 8+ messages in thread
From: Jeff Law @ 2019-05-31 21:41 UTC (permalink / raw)
  To: Wilco Dijkstra, GCC Patches; +Cc: nd

On 5/28/19 11:26 AM, Wilco Dijkstra wrote:
> Improve the fix for PR64242.  Various optimizations can change a memory reference
> into a frame access.  Given there are multiple virtual frame pointers which may
> be replaced by multiple hard frame pointers, there are no checks for writes to the
> various frame pointers.  So updates to a frame pointer tends to generate incorrect
> code.  Improve the previous fix to also add clobbers of several frame pointers and
> add a scheduling barrier.  This should work in most cases until GCC supports a
> generic "don't optimize across this instruction" feature.
> 
> Bootstrap OK. Testcase passes on AArch64 and x86-64.  Inspected x86, Arm,
> Thumb-1 and Thumb-2 assembler which looks correct. 
> 
> ChangeLog:
> 2018-12-07  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> gcc/
> 	PR middle-end/64242
> 	* builtins.c (expand_builtin_longjmp): Add frame clobbers and schedule block.
> 	(expand_builtin_nonlocal_goto): Likewise.
> 
> testsuite/
> 	PR middle-end/64242
> 	* gcc.c-torture/execute/pr64242.c: Update test.
OK.  Though given history we might expect some targets to barf on the
test changes.  Please keep an eye out for such breakage.

jeff

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] Fix PR64242
@ 2019-05-28 17:33 Wilco Dijkstra
  2019-05-31 21:41 ` Jeff Law
  0 siblings, 1 reply; 8+ messages in thread
From: Wilco Dijkstra @ 2019-05-28 17:33 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

Improve the fix for PR64242.  Various optimizations can change a memory reference
into a frame access.  Given there are multiple virtual frame pointers which may
be replaced by multiple hard frame pointers, there are no checks for writes to the
various frame pointers.  So updates to a frame pointer tends to generate incorrect
code.  Improve the previous fix to also add clobbers of several frame pointers and
add a scheduling barrier.  This should work in most cases until GCC supports a
generic "don't optimize across this instruction" feature.

Bootstrap OK. Testcase passes on AArch64 and x86-64.  Inspected x86, Arm,
Thumb-1 and Thumb-2 assembler which looks correct. 

ChangeLog:
2018-12-07  Wilco Dijkstra  <wdijkstr@arm.com>

gcc/
	PR middle-end/64242
	* builtins.c (expand_builtin_longjmp): Add frame clobbers and schedule block.
	(expand_builtin_nonlocal_goto): Likewise.

testsuite/
	PR middle-end/64242
	* gcc.c-torture/execute/pr64242.c: Update test.

--
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 3f32754c4d35fc34af7c53156d2a356f69a94a8f..3463ffb153914a58e5baa3896a244842a28eef09 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -1137,15 +1137,20 @@ expand_builtin_longjmp (rtx buf_addr, rtx value)
 	emit_insn (targetm.gen_nonlocal_goto (value, lab, stack, fp));
       else
 	{
-	  lab = copy_to_reg (lab);
-
 	  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
 	  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
 
+	  lab = copy_to_reg (lab);
+
 	  /* Restore the frame pointer and stack pointer.  We must use a
 	     temporary since the setjmp buffer may be a local.  */
 	  fp = copy_to_reg (fp);
 	  emit_stack_restore (SAVE_NONLOCAL, stack);
+
+	  /* Ensure the frame pointer move is not optimized.  */
+	  emit_insn (gen_blockage ());
+	  emit_clobber (hard_frame_pointer_rtx);
+	  emit_clobber (frame_pointer_rtx);
 	  emit_move_insn (hard_frame_pointer_rtx, fp);
 
 	  emit_use (hard_frame_pointer_rtx);
@@ -1284,15 +1289,20 @@ expand_builtin_nonlocal_goto (tree exp)
     emit_insn (targetm.gen_nonlocal_goto (const0_rtx, r_label, r_sp, r_fp));
   else
     {
-      r_label = copy_to_reg (r_label);
-
       emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
       emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
 
+      r_label = copy_to_reg (r_label);
+
       /* Restore the frame pointer and stack pointer.  We must use a
 	 temporary since the setjmp buffer may be a local.  */
       r_fp = copy_to_reg (r_fp);
       emit_stack_restore (SAVE_NONLOCAL, r_sp);
+
+      /* Ensure the frame pointer move is not optimized.  */
+      emit_insn (gen_blockage ());
+      emit_clobber (hard_frame_pointer_rtx);
+      emit_clobber (frame_pointer_rtx);
       emit_move_insn (hard_frame_pointer_rtx, r_fp);
 
       /* USE of hard_frame_pointer_rtx added for consistency;
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr64242.c b/gcc/testsuite/gcc.c-torture/execute/pr64242.c
index 46a7b23d28d71604d141281c21fb0b77849b1b0d..e6139ede3f34d587ac53d04e286e5d75fd2ca76c 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr64242.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr64242.c
@@ -5,46 +5,31 @@ extern void abort (void);
 __attribute ((noinline)) void
 broken_longjmp (void *p)
 {
-  void *buf[5];
+  void *buf[32];
   __builtin_memcpy (buf, p, 5 * sizeof (void*));
   /* Corrupts stack pointer...  */
   __builtin_longjmp (buf, 1);
 }
 
-__attribute ((noipa)) __UINTPTR_TYPE__
-foo (void *p)
-{
-  return (__UINTPTR_TYPE__) p;
-}
-
-__attribute ((noipa)) void
-bar (void *p)
-{
-  asm volatile ("" : : "r" (p));
-}
-
 volatile int x = 0;
-void *volatile p;
-void *volatile q;
+char *volatile p;
+char *volatile q;
 
 int
 main ()
 {
   void *buf[5];
-  struct __attribute__((aligned (32))) S { int a[4]; } s;
-  bar (&s);
   p = __builtin_alloca (x);
+  q = __builtin_alloca (x);
   if (!__builtin_setjmp (buf))
     broken_longjmp (buf);
 
+  /* Compute expected next alloca offset - some targets don't align properly
+     and allocate too much.  */
+  p = q + (q - p);
+
   /* Fails if stack pointer corrupted.  */
-  q = __builtin_alloca (x);
-  if (foo (p) < foo (q))
-    {
-      if (foo (q) - foo (p) >= 1024)
-	abort ();
-    }
-  else if (foo (p) - foo (q) >= 1024)
+  if (p != __builtin_alloca (x))
     abort ();
 
   return 0;

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-05-31 21:22 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-07 14:52 [PATCH v2] Fix PR64242 Wilco Dijkstra
2018-12-07 15:55 ` Jakub Jelinek
2018-12-07 16:19   ` Wilco Dijkstra
2018-12-07 16:23     ` Jakub Jelinek
2018-12-07 16:49       ` Wilco Dijkstra
2019-01-10 13:06         ` Wilco Dijkstra
2019-05-28 17:33 Wilco Dijkstra
2019-05-31 21:41 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).