public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][AArch64] Simplify eh_return implementation
@ 2016-08-04 11:21 Wilco Dijkstra
  2016-08-10 16:26 ` [PATCH][AArch64 - v2] " Wilco Dijkstra
  0 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2016-08-04 11:21 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-04  Wilco Dijkstra  <wdijkstr@arm.com>

gcc/
	* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
	* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
	(EH_RETURN_HANDLER_RTX): New define.
	* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
	Force frame pointer in EH return functions.
	(aarch64_final_eh_return_addr): Remove.
	(aarch64_eh_return_handler_rtx): New function.
	* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
	Remove.
	(aarch64_eh_return_handler_rtx): New prototype.

--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)	\
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM	(R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 405a75914ceed81c6bbbe1384fce815f6d673d6c..f951323586742a6313b5bb345252ce4b4d10debd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2718,6 +2718,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3348,52 +3352,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-	      - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-			plus_constant (Pmode,
-				       stack_pointer_rtx,
-				       fp_offset
-				       + cfun->machine->frame.saved_regs_size
-				       - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
 				(const_int 0))

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v2] Simplify eh_return implementation
  2016-08-04 11:21 [PATCH][AArch64] Simplify eh_return implementation Wilco Dijkstra
@ 2016-08-10 16:26 ` Wilco Dijkstra
  2016-08-23 14:48   ` Wilco Dijkstra
  2016-09-01 13:40   ` Ramana Radhakrishnan
  0 siblings, 2 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-08-10 16:26 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

I noticed it would still be a good idea to add an extra barrier in the epilog as the
scheduler doesn't appear to handle aliases of frame accesses properly.

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-10  Wilco Dijkstra  <wdijkstr@arm.com>
gcc/
	* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
	* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
	(EH_RETURN_HANDLER_RTX): New define.
	* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
	Force frame pointer in EH return functions.
	(aarch64_expand_epilogue): Add barrier for eh_return.
	(aarch64_final_eh_return_addr): Remove.
	(aarch64_eh_return_handler_rtx): New function.
	* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
	Remove.
	(aarch64_eh_return_handler_rtx): New prototype.
--

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)	\
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM	(R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
 			 + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-	      - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-			plus_constant (Pmode,
-				       stack_pointer_rtx,
-				       fp_offset
-				       + cfun->machine->frame.saved_regs_size
-				       - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
 				(const_int 0))

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v2] Simplify eh_return implementation
  2016-08-10 16:26 ` [PATCH][AArch64 - v2] " Wilco Dijkstra
@ 2016-08-23 14:48   ` Wilco Dijkstra
  2016-08-26 15:03     ` Jiong Wang
  2016-09-01 12:33     ` Wilco Dijkstra
  2016-09-01 13:40   ` Ramana Radhakrishnan
  1 sibling, 2 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-08-23 14:48 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

Ping

I noticed it would still be a good idea to add an extra barrier in the epilog as the
scheduler doesn't appear to handle aliases of frame accesses properly.

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-10  Wilco Dijkstra  <wdijkstr@arm.com>
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.
--

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v2] Simplify eh_return implementation
  2016-08-23 14:48   ` Wilco Dijkstra
@ 2016-08-26 15:03     ` Jiong Wang
  2016-08-26 18:47       ` Wilco Dijkstra
  2016-09-01 12:33     ` Wilco Dijkstra
  1 sibling, 1 reply; 21+ messages in thread
From: Jiong Wang @ 2016-08-26 15:03 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches, nd


Wilco Dijkstra writes:

> Ping
>
> I noticed it would still be a good idea to add an extra barrier in the epilog as the
> scheduler doesn't appear to handle aliases of frame accesses properly.
>
> This patch simplifies the handling of the EH return value.  We force the use of the
> frame pointer so the return location is always at FP + 8.  This means we can emit
> a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
> patterns, splitters and frame offset calculations.  The new implementation also
> fixes various bugs in aarch64_final_eh_return_addr, which does not work with
> -fomit-frame-pointer, alloca or outgoing arguments.

The -fomit-frame-pointer is really broken on aarch64_find_eh_return_addr

-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));

the saved_regs_size includes both general and vector register saving
area, while LR should be saved on top of general register
area. Meanwhile saved_regs_size contains alignment amount.

Given EH unwind code will invoke __builtin_unwind_init which pushes all
callee-saved, both general and vector, the current function will always
get wrong offset.

I think the correct offset when -fomit-frame-pointer should be:

  "cfun->machine->frame.reg_offset[LR_REGNUM]"

I have done a quick check on _Unwind_RaiseException which is the only
code affected by this change.  Without frame pointer, the exception
handler's address is installed in different, thus wrong, stack slot.

...
str     x30, [sp, 112]
...
str     x19, [sp, 176]

This approach used in this patch looks good to me.

> 2016-08-10  Wilco Dijkstra  <wdijkstr@arm.com>
> gcc/
>         * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
>         * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
>         (EH_RETURN_HANDLER_RTX): New define.
>         * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
>         Force frame pointer in EH return functions.
>         (aarch64_expand_epilogue): Add barrier for eh_return.
>         (aarch64_final_eh_return_addr): Remove.
>         (aarch64_eh_return_handler_rtx): New function.
>         * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
>         Remove.
>         (aarch64_eh_return_handler_rtx): New prototype.

-- 
Regards,
Jiong

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v2] Simplify eh_return implementation
  2016-08-26 15:03     ` Jiong Wang
@ 2016-08-26 18:47       ` Wilco Dijkstra
  0 siblings, 0 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-08-26 18:47 UTC (permalink / raw)
  To: Jiong Wang; +Cc: GCC Patches, nd

Jiong Wang <jiong.wang@foss.arm.com> wrote:
> The -fomit-frame-pointer is really broken on aarch64_find_eh_return_addr

Yes, that's a good conclusion. However even with the frame pointer there are 
cases that fail, for example it will access LR off SP even after alloca. In fact
we're lucky it works at all sometimes...

> I think the correct offset when -fomit-frame-pointer should be:
>
>  "cfun->machine->frame.reg_offset[LR_REGNUM]"

With the offset to the register save area added of course. But then you still
need to expand late and so need the patterns in the MD. By forcing a frame
pointer we avoid all that complexity.

Wilco

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v2] Simplify eh_return implementation
  2016-08-23 14:48   ` Wilco Dijkstra
  2016-08-26 15:03     ` Jiong Wang
@ 2016-09-01 12:33     ` Wilco Dijkstra
  1 sibling, 0 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-09-01 12:33 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

 
Ping

I noticed it would still be a good idea to add an extra barrier in the epilog as the
scheduler doesn't appear to handle aliases of frame accesses properly.

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-10  Wilco Dijkstra  <wdijkstr@arm.com>
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.
--

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v2] Simplify eh_return implementation
  2016-08-10 16:26 ` [PATCH][AArch64 - v2] " Wilco Dijkstra
  2016-08-23 14:48   ` Wilco Dijkstra
@ 2016-09-01 13:40   ` Ramana Radhakrishnan
  2016-09-02 11:31     ` [PATCH][AArch64 - v3] " Wilco Dijkstra
  1 sibling, 1 reply; 21+ messages in thread
From: Ramana Radhakrishnan @ 2016-09-01 13:40 UTC (permalink / raw)
  To: Wilco Dijkstra, GCC Patches; +Cc: nd



On 10/08/16 17:26, Wilco Dijkstra wrote:
> I noticed it would still be a good idea to add an extra barrier in the epilog as the
> scheduler doesn't appear to handle aliases of frame accesses properly.
> 
> This patch simplifies the handling of the EH return value.  We force the use of the
> frame pointer so the return location is always at FP + 8.  This means we can emit
> a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
> patterns, splitters and frame offset calculations.  The new implementation also
> fixes various bugs in aarch64_final_eh_return_addr, which does not work with
> -fomit-frame-pointer, alloca or outgoing arguments.
> 
> Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
> this to GCC6.x?


Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.



Ramana
> 
> ChangeLog:
> 2016-08-10  Wilco Dijkstra  <wdijkstr@arm.com>
> gcc/
> 	* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
> 	* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
> 	(EH_RETURN_HANDLER_RTX): New define.
> 	* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
> 	Force frame pointer in EH return functions.
> 	(aarch64_expand_epilogue): Add barrier for eh_return.
> 	(aarch64_final_eh_return_addr): Remove.
> 	(aarch64_eh_return_handler_rtx): New function.
> 	* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
> 	Remove.
> 	(aarch64_eh_return_handler_rtx): New prototype.
> --
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
>  int aarch64_hard_regno_nregs (unsigned, machine_mode);
>  int aarch64_uxt_size (int, HOST_WIDE_INT);
>  int aarch64_vec_fpconst_pow_of_2 (rtx);
> -rtx aarch64_final_eh_return_addr (void);
> +rtx aarch64_eh_return_handler_rtx (void);
>  rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>  const char *aarch64_output_move_struct (rtx *operands);
>  rtx aarch64_return_addr (int, rtx);
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
>  #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)	\
>    aarch64_declare_function_name (STR, NAME, DECL)
>  
> -/* The register that holds the return address in exception handlers.  */
> -#define AARCH64_EH_STACKADJ_REGNUM	(R0_REGNUM + 4)
> -#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
> +/* For EH returns X4 contains the stack adjustment.  */
> +#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
> +#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
>  
>  /* Don't use __builtin_setjmp until we've defined it.  */
>  #undef DONT_USE_BUILTIN_SETJMP
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
>        && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
>      return true;
>  
> +  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
> +  if (crtl->calls_eh_return)
> +    return true;
> +
>    return false;
>  }
>  
> @@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
>  			 + cfun->machine->frame.saved_varargs_size) != 0;
>  
>    /* Emit a barrier to prevent loads from a deallocated stack.  */
> -  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
> +  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
> +      || crtl->calls_eh_return)
>      {
>        emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
>        need_barrier_p = false;
> @@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
>      emit_jump_insn (ret_rtx);
>  }
>  
> -/* Return the place to copy the exception unwinding return address to.
> -   This will probably be a stack slot, but could (in theory be the
> -   return register).  */
> +/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
> +   The access needs to be volatile to prevent it from being removed.  */
>  rtx
> -aarch64_final_eh_return_addr (void)
> +aarch64_eh_return_handler_rtx (void)
>  {
> -  HOST_WIDE_INT fp_offset;
> -
> -  aarch64_layout_frame ();
> -
> -  fp_offset = cfun->machine->frame.frame_size
> -	      - cfun->machine->frame.hard_fp_offset;
> -
> -  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
> -    return gen_rtx_REG (DImode, LR_REGNUM);
> -
> -  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
> -     result in a store to save LR introduced by builtin_eh_return () being
> -     incorrectly deleted because the alias is not detected.
> -     So in the calculation of the address to copy the exception unwinding
> -     return address to, we note 2 cases.
> -     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
> -     we return a SP-relative location since all the addresses are SP-relative
> -     in this case.  This prevents the store from being optimized away.
> -     If the fp_offset is not 0, then the addresses will be FP-relative and
> -     therefore we return a FP-relative location.  */
> -
> -  if (frame_pointer_needed)
> -    {
> -      if (fp_offset)
> -        return gen_frame_mem (DImode,
> -			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
> -      else
> -        return gen_frame_mem (DImode,
> -			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
> -    }
> -
> -  /* If FP is not needed, we calculate the location of LR, which would be
> -     at the top of the saved registers block.  */
> -
> -  return gen_frame_mem (DImode,
> -			plus_constant (Pmode,
> -				       stack_pointer_rtx,
> -				       fp_offset
> -				       + cfun->machine->frame.saved_regs_size
> -				       - 2 * UNITS_PER_WORD));
> +  rtx tmp = gen_frame_mem (Pmode,
> +    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
> +  MEM_VOLATILE_P (tmp) = true;
> +  return tmp;
>  }
>  
>  /* Output code to add DELTA to the first argument, and then jump
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -591,25 +591,6 @@
>    [(set_attr "type" "branch")]
>  )
>  
> -(define_insn "eh_return"
> -  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
> -    UNSPECV_EH_RETURN)]
> -  ""
> -  "#"
> -  [(set_attr "type" "branch")]
> -
> -)
> -
> -(define_split
> -  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
> -    UNSPECV_EH_RETURN)]
> -  "reload_completed"
> -  [(set (match_dup 1) (match_dup 0))]
> -  {
> -    operands[1] = aarch64_final_eh_return_addr ();
> -  }
> -)
> -
>  (define_insn "*cb<optab><mode>1"
>    [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
>  				(const_int 0))
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-09-01 13:40   ` Ramana Radhakrishnan
@ 2016-09-02 11:31     ` Wilco Dijkstra
  2016-09-12 14:51       ` Wilco Dijkstra
                         ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-09-02 11:31 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: nd

Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

	PR77455
gcc/
	* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
	* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
	(EH_RETURN_HANDLER_RTX): New define.
	* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
	Force frame pointer in EH return functions.
	(aarch64_expand_epilogue): Add barrier for eh_return.
	(aarch64_final_eh_return_addr): Remove.
	(aarch64_eh_return_handler_rtx): New function.
	* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
	Remove.
	(aarch64_eh_return_handler_rtx): New prototype.

testsuite/
	* gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)	\
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM	(R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
 			 + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-	      - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-			plus_constant (Pmode,
-				       stack_pointer_rtx,
-				       fp_offset
-				       + cfun->machine->frame.saved_regs_size
-				       - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
 				(const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-09-02 11:31     ` [PATCH][AArch64 - v3] " Wilco Dijkstra
@ 2016-09-12 14:51       ` Wilco Dijkstra
  2016-09-21 14:50       ` Wilco Dijkstra
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-09-12 14:51 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: nd


ping
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-09-02 11:31     ` [PATCH][AArch64 - v3] " Wilco Dijkstra
  2016-09-12 14:51       ` Wilco Dijkstra
@ 2016-09-21 14:50       ` Wilco Dijkstra
  2016-10-17 12:41       ` Wilco Dijkstra
  2017-01-13 17:54       ` James Greenhalgh
  3 siblings, 0 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-09-21 14:50 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: nd


ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-09-02 11:31     ` [PATCH][AArch64 - v3] " Wilco Dijkstra
  2016-09-12 14:51       ` Wilco Dijkstra
  2016-09-21 14:50       ` Wilco Dijkstra
@ 2016-10-17 12:41       ` Wilco Dijkstra
  2016-10-25  9:48         ` Wilco Dijkstra
  2017-01-13 17:54       ` James Greenhalgh
  3 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2016-10-17 12:41 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: nd



ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-10-17 12:41       ` Wilco Dijkstra
@ 2016-10-25  9:48         ` Wilco Dijkstra
  2016-11-02 16:49           ` Wilco Dijkstra
  0 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2016-10-25  9:48 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: nd


ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}   

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-10-25  9:48         ` Wilco Dijkstra
@ 2016-11-02 16:49           ` Wilco Dijkstra
  2016-11-14 13:05             ` Wilco Dijkstra
  0 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2016-11-02 16:49 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: nd


    

ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}            

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-11-02 16:49           ` Wilco Dijkstra
@ 2016-11-14 13:05             ` Wilco Dijkstra
  2016-12-06 15:27               ` Wilco Dijkstra
  0 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2016-11-14 13:05 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches; +Cc: nd

ping

From: Wilco Dijkstra
Sent: 02 November 2016 16:49
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    

    

ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}                

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-11-14 13:05             ` Wilco Dijkstra
@ 2016-12-06 15:27               ` Wilco Dijkstra
  2016-12-14 16:43                 ` Wilco Dijkstra
  0 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2016-12-06 15:27 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches, James Greenhalgh; +Cc: nd

    

ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}                    

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-12-06 15:27               ` Wilco Dijkstra
@ 2016-12-14 16:43                 ` Wilco Dijkstra
  0 siblings, 0 replies; 21+ messages in thread
From: Wilco Dijkstra @ 2016-12-14 16:43 UTC (permalink / raw)
  To: Ramana Radhakrishnan, GCC Patches, James Greenhalgh; +Cc: nd



ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853dafcccc08842955288861ec7e7acca 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)      \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM     (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
                          + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
-             - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-                             plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
-
-  return gen_frame_mem (DImode,
-                       plus_constant (Pmode,
-                                      stack_pointer_rtx,
-                                      fp_offset
-                                      + cfun->machine->frame.saved_regs_size
-                                      - 2 * UNITS_PER_WORD));
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 21f5a6aba74d28f04b9391ba917453a4cd7de1af..7d86aa7c0cb2fdb30889badc172d2270eeadb1e5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -591,25 +591,6 @@
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
                                 (const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}                        

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2016-09-02 11:31     ` [PATCH][AArch64 - v3] " Wilco Dijkstra
                         ` (2 preceding siblings ...)
  2016-10-17 12:41       ` Wilco Dijkstra
@ 2017-01-13 17:54       ` James Greenhalgh
  2017-01-13 19:51         ` Wilco Dijkstra
  3 siblings, 1 reply; 21+ messages in thread
From: James Greenhalgh @ 2017-01-13 17:54 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Ramana Radhakrishnan, GCC Patches, nd

On Fri, Sep 02, 2016 at 11:31:04AM +0000, Wilco Dijkstra wrote:
> Ramana Radhakrishnan wrote:
> > Can you please file a PR for this and add some testcases ?  This sounds like a serious enough problem that needs to be looked at probably going back since the dawn of time.
> 
> I've created PR77455. Updated patch below:
> 
> This patch simplifies the handling of the EH return value.  We force the use of the
> frame pointer so the return location is always at FP + 8.  This means we can emit
> a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
> patterns, splitters and frame offset calculations.  The new implementation also
> fixes various bugs in aarch64_final_eh_return_addr, which does not work with
> -fomit-frame-pointer, alloca or outgoing arguments.

I've been putting off reviewing this patch for a while now, because I don't
understand enough about the current eh_return code to understand why what
you're proposing is correct.

The best way to progress this patch would be to go in to more detail as to
what the current code does, why we don't need it, and why your new
implementation is sufficient. The current code looks like it covers special
cases, and calls out DSE and CSELIB as needing special handling - your
new code doesn't.

Could you explain your patch to me assuming I know very little about the
implementation, with particular focus on why we no longer need the special
cases?

Thanks,
James

> 
> Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
> this to GCC6.x?
> 
> ChangeLog:
> 
> 2016-09-02  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	PR77455
> gcc/
> 	* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
> 	* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
> 	(EH_RETURN_HANDLER_RTX): New define.
> 	* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
> 	Force frame pointer in EH return functions.
> 	(aarch64_expand_epilogue): Add barrier for eh_return.
> 	(aarch64_final_eh_return_addr): Remove.
> 	(aarch64_eh_return_handler_rtx): New function.
> 	* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
> 	Remove.
> 	(aarch64_eh_return_handler_rtx): New prototype.
> 
> testsuite/
> 	* gcc.target/aarch64/eh_return.c: New test.
> --
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2017-01-13 17:54       ` James Greenhalgh
@ 2017-01-13 19:51         ` Wilco Dijkstra
  2017-01-16 11:05           ` James Greenhalgh
  0 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2017-01-13 19:51 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: Ramana Radhakrishnan, GCC Patches, nd

James Greenhalgh wrote:

> I've been putting off reviewing this patch for a while now, because I don't
> understand enough about the current eh_return code to understand why what
> you're proposing is correct.
>
> The best way to progress this patch would be to go in to more detail as to
> what the current code does, why we don't need it, and why your new
> implementation is sufficient. The current code looks like it covers special
> cases, and calls out DSE and CSELIB as needing special handling - your
> new code doesn't.
> 
> Could you explain your patch to me assuming I know very little about the
> implementation, with particular focus on why we no longer need the special
> cases?

Well eh_return is far more complex than the few lines of assembly code it was
intending to avoid...

Basically the EH return feature wants the capability to either return normally or return
to a previous frame after unwinding. Oddly enough the design is to use a single shared
return sequence. The epilog is exactly like a normal epilog except that it has an extra
input register which contains the stack adjustment which must be applied after the
frame has been destroyed. An extra label is inserted before the eplilog which sets this
stack adjustment to zero, and this is the entry point for a normal return. 

An EH return updates the return address, initializes the stack adjustment and jumps
directly into the epilog (bypassing the zeroing of the adjustment). Sounds insane already?
Well it gets worse...

Given the return address is typically saved on the stack when a function makes a 
call, we now need to overwrite the saved LR outside the epilog. This poses a problem 
as we need to emit this store before the epilog is emitted, ie. before the offset of LR is 
even known. We also need to ensure the store is not removed. 

The existing implementation tries to do this by using a special eh_return pattern which
emits the store as late as possible (at least after the frame layout has been finalized).
However it doesn't do this late enough so DSE is still run after it...

To try to avoid DSE removing the store, the existing implementation tries to make the 
store alias with the load by using the same base register and offset. Given the epilog 
always reads LR from SP, it follows that whenever FP is used for the store, it will be
optimized away.

Even when it uses SP as the base for the store, SP may be updated in the epilog before
LR is read (for example alloca functions or frames with large locals or outgoing arguments
will update SP before reading LR). Worse, the offset it uses for the store is incorrect, so if
the store is not optimized, it just corrupts a random callee-save rather than overwriting LR.

So basically to conclude the existing implementation only works for one specific case
where there are a specific number of callee-saves, no floating point registers, no outgoing
arguments, no alloca used, a small number of locals, and frame pointer is enabled.

The new implementation is trivially correct by forcing the use of a frame pointer so that
the location of LR is fixed, known early, and using a volatile means no optimization could
remove the store. It's also much simpler by avoiding all the workarounds using patterns,
splitters and trying to ensure the store appears not dead.

Wilco

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
  2017-01-13 19:51         ` Wilco Dijkstra
@ 2017-01-16 11:05           ` James Greenhalgh
  2017-01-16 15:01             ` [PATCH][AArch64 - v4] " Wilco Dijkstra
  0 siblings, 1 reply; 21+ messages in thread
From: James Greenhalgh @ 2017-01-16 11:05 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Ramana Radhakrishnan, GCC Patches, nd

On Fri, Jan 13, 2017 at 07:50:48PM +0000, Wilco Dijkstra wrote:
> James Greenhalgh wrote:
> 
> > I've been putting off reviewing this patch for a while now, because I don't
> > understand enough about the current eh_return code to understand why what
> > you're proposing is correct.
> >
> > The best way to progress this patch would be to go in to more detail as to
> > what the current code does, why we don't need it, and why your new
> > implementation is sufficient. The current code looks like it covers special
> > cases, and calls out DSE and CSELIB as needing special handling - your
> > new code doesn't.
> > 
> > Could you explain your patch to me assuming I know very little about the
> > implementation, with particular focus on why we no longer need the special
> > cases?
> 
> Well eh_return is far more complex than the few lines of assembly code it was
> intending to avoid...
>
> Basically the EH return feature wants the capability to either return
> normally or return to a previous frame after unwinding. Oddly enough the
> design is to use a single shared return sequence. The epilog is exactly like
> a normal epilog except that it has an extra input register which contains the
> stack adjustment which must be applied after the frame has been destroyed. An
> extra label is inserted before the eplilog which sets this stack adjustment
> to zero, and this is the entry point for a normal return. 
> 
> An EH return updates the return address, initializes the stack adjustment and
> jumps directly into the epilog (bypassing the zeroing of the adjustment).
> Sounds insane already?  Well it gets worse...
> 
> Given the return address is typically saved on the stack when a function
> makes a call, we now need to overwrite the saved LR outside the epilog. This
> poses a problem as we need to emit this store before the epilog is emitted,
> ie. before the offset of LR is even known. We also need to ensure the store
> is not removed. 
> 
> The existing implementation tries to do this by using a special eh_return
> pattern which emits the store as late as possible (at least after the frame
> layout has been finalized).  However it doesn't do this late enough so DSE is
> still run after it...
> 
> To try to avoid DSE removing the store, the existing implementation tries to
> make the store alias with the load by using the same base register and
> offset. Given the epilog always reads LR from SP, it follows that whenever FP
> is used for the store, it will be optimized away.
> 
> Even when it uses SP as the base for the store, SP may be updated in the
> epilog before LR is read (for example alloca functions or frames with large
> locals or outgoing arguments will update SP before reading LR). Worse, the
> offset it uses for the store is incorrect, so if the store is not optimized,
> it just corrupts a random callee-save rather than overwriting LR.
> 
> So basically to conclude the existing implementation only works for one
> specific case where there are a specific number of callee-saves, no floating
> point registers, no outgoing arguments, no alloca used, a small number of
> locals, and frame pointer is enabled.
> 
> The new implementation is trivially correct by forcing the use of a frame
> pointer so that the location of LR is fixed, known early, and using a
> volatile means no optimization could remove the store. It's also much simpler
> by avoiding all the workarounds using patterns, splitters and trying to
> ensure the store appears not dead.

Great, thanks for the detailed explanation. If we can borrow most of this
text and place it in the code as a comment, I think this patch will be good
to go.

How about putting this slight rewording at the top of
aarch64_eh_return_handler_rtx:

/* The EH return feature wants the capability to either return
   normally or return to a previous frame after unwinding.

   The design is to use a single shared return sequence.  The epilogue is
   exactly like a normal epilogue except that it has an extra input
   register which contains the stack adjustment which must be applied after
   the frame has been destroyed.  An extra label is inserted before the
   epilogue which sets this stack adjustment to zero, and this is the
   entry point for a normal return.

   An EH return updates the return address, initializes the stack
   adjustment and jumps directly into the epilogue (bypassing the zeroing
   of the adjustment).  But, it gets worse...

   Given the return address is typically saved on the stack when a function
   makes a call, we now need to overwrite the saved LR outside the epilogue.
   This poses a problem as we need to emit this store before the
   epilogue is emitted, i.e. before the offset of LR is even known.  We also
   need to ensure the store is not removed. 

   Previously, we did the by using a special eh_return pattern which
   would emits the store as late as possible (at least after the frame
   layout has been finalized).  However, DSE would still be run after this
   expansion so it would not be safe.

   To try to avoid DSE removing the store, we previously tried to
   make the store alias with the load by using the same base register
   and offset.  Given the epilogue always reads LR from SP, it follows
   that whenever FP is used for the store, it will be optimized away.

   Even when we used SP as the base for the store, SP may be updated in the
   epilogue before LR is read (for example alloca functions or frames with
   large locals or outgoing arguments will update SP before reading LR).

   This simpler implementation is trivially correct as it forces the use
   of a frame pointer for eh_return functions so that the location of LR
   is fixed and known early, marking the store volatile means no optimization
   is permitted to remove the store.  It avoids workarounds using
   patterns, splitters and other tricks which try to ensure the store
   does not appear dead.  */

I'd then be tempted to repeat this text directly above
MEM_VOLATILE_P (tmp) = true;

  /* Marking the store volatile means no optimization is permitted to
     remove the store.  It avoids workarounds using patterns, splitters
     and other tricks which try to ensure the store does not appear
     dead.  */

If you have other patches which have been stuck in review for a long time,
detailed explanations like this can really help speed up the process of
reviewing and approving them.

Thanks,
James

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v4] Simplify eh_return implementation
  2017-01-16 11:05           ` James Greenhalgh
@ 2017-01-16 15:01             ` Wilco Dijkstra
  2017-01-17 18:04               ` James Greenhalgh
  0 siblings, 1 reply; 21+ messages in thread
From: Wilco Dijkstra @ 2017-01-16 15:01 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: Ramana Radhakrishnan, GCC Patches, nd

Here is the updated version:

This patch simplifies the handling of the EH return value.  We force the use of the
frame pointer so the return location is always at FP + 8.  This means we can emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2017-01-16  Wilco Dijkstra  <wdijkstr@arm.com>

        PR77455
gcc/
        * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
        * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
        (EH_RETURN_HANDLER_RTX): New define.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
        Force frame pointer in EH return functions.
        (aarch64_expand_epilogue): Add barrier for eh_return.
        (aarch64_final_eh_return_addr): Remove.
        (aarch64_eh_return_handler_rtx): New function.
        * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
        Remove.
        (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
        * gcc.target/aarch64/eh_return.c: New test.
--

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..602f54f934a9b62e782154e7f19cafe3b1bf0481 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 5aee9eb5b42d200343484ac4d18d650d26027078..924376223f52e9f614ea2b4bc12bd24d2ab2720a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)	\
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM	(R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4e5fad912ba24ffeca36e2d601b67e09c005bb54..d2585dc6eb14fc82c952ca8dbeda445d249fdb9b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2762,6 +2762,10 @@ aarch64_frame_pointer_required (void)
       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
     return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3623,7 +3627,8 @@ aarch64_expand_epilogue (bool for_sibcall)
 			 + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+      || crtl->calls_eh_return)
     {
       emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
       need_barrier_p = false;
@@ -3691,52 +3696,40 @@ aarch64_expand_epilogue (bool for_sibcall)
     emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
-rtx
-aarch64_final_eh_return_addr (void)
-{
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
+/* Implement EH_RETURN_HANDLER_RTX.  EH returns need to either return
+   normally or return to a previous frame after unwinding.
 
-  fp_offset = cfun->machine->frame.frame_size
-	      - cfun->machine->frame.hard_fp_offset;
+   An EH return uses a single shared return sequence.  The epilogue is
+   exactly like a normal epilogue except that it has an extra input
+   register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
+   that must be applied after the frame has been destroyed.  An extra label
+   is inserted before the epilogue which initializes this register to zero,
+   and this is the entry point for a normal return.
 
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
+   An actual EH return updates the return address, initializes the stack
+   adjustment and jumps directly into the epilogue (bypassing the zeroing
+   of the adjustment).  Since the return address is typically saved on the
+   stack when a function makes a call, the saved LR must be updated outside
+   the epilogue.
 
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
-     result in a store to save LR introduced by builtin_eh_return () being
-     incorrectly deleted because the alias is not detected.
-     So in the calculation of the address to copy the exception unwinding
-     return address to, we note 2 cases.
-     If FP is needed and the fp_offset is 0, it means that SP = FP and hence
-     we return a SP-relative location since all the addresses are SP-relative
-     in this case.  This prevents the store from being optimized away.
-     If the fp_offset is not 0, then the addresses will be FP-relative and
-     therefore we return a FP-relative location.  */
+   This poses problems as the store is generated well before the epilogue,
+   so the offset of LR is not known yet.  Also optimizations will remove the
+   store as it appears dead, even after the epilogue is generated (as the
+   base or offset for loading LR is different in many cases).
 
-  if (frame_pointer_needed)
-    {
-      if (fp_offset)
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
-      else
-        return gen_frame_mem (DImode,
-			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
-    }
-
-  /* If FP is not needed, we calculate the location of LR, which would be
-     at the top of the saved registers block.  */
+   To avoid these problems this implementation forces the frame pointer
+   in eh_return functions so that the location of LR is fixed and known early.
+   It also marks the store volatile, so no optimization is permitted to
+   remove the store.  */
+rtx
+aarch64_eh_return_handler_rtx (void)
+{
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
 
-  return gen_frame_mem (DImode,
-			plus_constant (Pmode,
-				       stack_pointer_rtx,
-				       fp_offset
-				       + cfun->machine->frame.saved_regs_size
-				       - 2 * UNITS_PER_WORD));
+  /* Mark the store volatile, so no optimization is permitted to remove it.  */
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
 }
 
 /* Output code to add DELTA to the first argument, and then jump
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index bde42317f1bfd91a9d38a4cfa94d4cedd5246003..b3be106ac6ff1fb77bdc05cb4b0e57102787b87c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -592,25 +592,6 @@ (define_insn "simple_return"
   [(set_attr "type" "branch")]
 )
 
-(define_insn "eh_return"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
-    UNSPECV_EH_RETURN)]
-  ""
-  "#"
-  [(set_attr "type" "branch")]
-
-)
-
-(define_split
-  [(unspec_volatile [(match_operand:DI 0 "register_operand" "")]
-    UNSPECV_EH_RETURN)]
-  "reload_completed"
-  [(set (match_dup 1) (match_dup 0))]
-  {
-    operands[1] = aarch64_final_eh_return_addr ();
-  }
-)
-
 (define_insn "*cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
 				(const_int 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return.c b/gcc/testsuite/gcc.target/aarch64/eh_return.c
new file mode 100644
index 0000000000000000000000000000000000000000..32179488085ed4c84aa77007565c875d09c4197c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int val, test, failed;
+
+int main (void);
+
+void
+eh0 (void *p)
+{
+  val = (int)(long)p & 7;
+  if (val)
+    abort ();
+}
+
+void
+eh1 (void *p, int x)
+{
+  void *q = __builtin_alloca (x);
+  eh0 (q);
+  __builtin_eh_return (0, p);
+}
+
+void
+eh2a (int a,int b,int c,int d,int e,int f,int g,int h, void *p)
+{
+  val = a + b + c + d + e + f + g + h +  (int)(long)p & 7;
+}
+
+void
+eh2 (void *p)
+{
+  eh2a (val, val, val, val, val, val, val, val, p);
+  __builtin_eh_return (0, p);
+}
+
+
+void
+continuation (void)
+{
+  test++;
+  main ();
+}
+
+void
+fail (void)
+{
+  failed = 1;
+  printf ("failed\n");
+  continuation ();
+}
+
+void
+do_test1 (void)
+{
+  if (!val)
+    eh1 (continuation, 100);
+  fail ();
+}
+
+void
+do_test2 (void)
+{
+  if (!val)
+    eh2 (continuation);
+  fail ();
+}
+
+int
+main (void)
+{
+  if (test == 0)
+    do_test1 ();
+  if (test == 1)
+    do_test2 ();
+  if (failed || test != 2)
+    exit (1);
+  exit (0);
+}

    

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH][AArch64 - v4] Simplify eh_return implementation
  2017-01-16 15:01             ` [PATCH][AArch64 - v4] " Wilco Dijkstra
@ 2017-01-17 18:04               ` James Greenhalgh
  0 siblings, 0 replies; 21+ messages in thread
From: James Greenhalgh @ 2017-01-17 18:04 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Ramana Radhakrishnan, GCC Patches, nd

On Mon, Jan 16, 2017 at 03:00:48PM +0000, Wilco Dijkstra wrote:
> Here is the updated version:
> 
> This patch simplifies the handling of the EH return value.  We force the use of the
> frame pointer so the return location is always at FP + 8.  This means we can emit
> a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
> patterns, splitters and frame offset calculations.  The new implementation also
> fixes various bugs in aarch64_final_eh_return_addr, which does not work with
> -fomit-frame-pointer, alloca or outgoing arguments.
> 
> Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
> this to GCC6.x?

This is OK for trunk. I think it would be useful on GCC 6, but give it a few
days on trunk to wait for fallout before backporting.

Thanks,
James

> 
> ChangeLog:
> 
> 2017-01-16  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>         PR77455
> gcc/
>         * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
>         * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
>         (EH_RETURN_HANDLER_RTX): New define.
>         * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
>         Force frame pointer in EH return functions.
>         (aarch64_expand_epilogue): Add barrier for eh_return.
>         (aarch64_final_eh_return_addr): Remove.
>         (aarch64_eh_return_handler_rtx): New function.
>         * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
>         Remove.
>         (aarch64_eh_return_handler_rtx): New prototype.
> 
> testsuite/
>         * gcc.target/aarch64/eh_return.c: New test.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2017-01-17 18:01 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-04 11:21 [PATCH][AArch64] Simplify eh_return implementation Wilco Dijkstra
2016-08-10 16:26 ` [PATCH][AArch64 - v2] " Wilco Dijkstra
2016-08-23 14:48   ` Wilco Dijkstra
2016-08-26 15:03     ` Jiong Wang
2016-08-26 18:47       ` Wilco Dijkstra
2016-09-01 12:33     ` Wilco Dijkstra
2016-09-01 13:40   ` Ramana Radhakrishnan
2016-09-02 11:31     ` [PATCH][AArch64 - v3] " Wilco Dijkstra
2016-09-12 14:51       ` Wilco Dijkstra
2016-09-21 14:50       ` Wilco Dijkstra
2016-10-17 12:41       ` Wilco Dijkstra
2016-10-25  9:48         ` Wilco Dijkstra
2016-11-02 16:49           ` Wilco Dijkstra
2016-11-14 13:05             ` Wilco Dijkstra
2016-12-06 15:27               ` Wilco Dijkstra
2016-12-14 16:43                 ` Wilco Dijkstra
2017-01-13 17:54       ` James Greenhalgh
2017-01-13 19:51         ` Wilco Dijkstra
2017-01-16 11:05           ` James Greenhalgh
2017-01-16 15:01             ` [PATCH][AArch64 - v4] " Wilco Dijkstra
2017-01-17 18:04               ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).