public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v3] aarch64: Fix normal returns inside functions which use eh_returns [PR114843]
@ 2024-05-20 14:43 Wilco Dijkstra
  2024-05-21 15:00 ` Richard Sandiford
  0 siblings, 1 reply; 3+ messages in thread
From: Wilco Dijkstra @ 2024-05-20 14:43 UTC (permalink / raw)
  To: quic_apinski; +Cc: GCC Patches

Hi Andrew,

A few comments on the implementation, I think it can be simplified a lot:

> +++ b/gcc/config/aarch64/aarch64.h
> @@ -700,8 +700,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
> #define DWARF2_UNWIND_INFO 1
>  
>  /* Use R0 through R3 to pass exception handling information.  */
> +#define EH_RETURN_DATA_REGISTERS_N 4
>  #define EH_RETURN_DATA_REGNO(N) \
> -  ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
> +  ((N) < EH_RETURN_DATA_REGISTERS_N ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
 
It would be useful to add a macro IS_EH_RETURN_REGNUM(regnum) that just checks
the range R0_REGNUM to R0_REGNUM + EH_RETURN_DATA_REGISTERS_N.

> @@ -929,6 +928,7 @@ struct GTY (()) aarch64_frame
>      outgoing arguments) of each register save slot, or -2 if no save is
>      needed.  */
>   poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
> +  bool eh_return_allocated[EH_RETURN_DATA_REGISTERS_N];

This doesn't make much sense - besides X0-X3, we also need X5 and X6 for eh_return.
If these or any of the other temporaries used by epilog are callee-saved somehow,
things are going horribly wrong already... So what do we gain by doing this?


> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -7792,6 +7792,7 @@ aarch64_layout_frame (void)
> 
>  #define SLOT_NOT_REQUIRED (-2)
>  #define SLOT_REQUIRED     (-1)
> +#define SLOT_EH_RETURN_REQUIRED     (-3)
 
I don't see a need for this.


> @@ -7949,6 +7950,18 @@ aarch64_layout_frame (void)
>         stopping it from being individually shrink-wrapped.  */
>      allocate_gpr_slot (R30_REGNUM);
>  
> +  /* Allocate the eh_return first. */
> +  if (crtl->calls_eh_return)
> +    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
> +      {
> +	int realregno = EH_RETURN_DATA_REGNO (regno);
> +	if (known_eq (frame.reg_offset[realregno], SLOT_EH_RETURN_REQUIRED))
> +	  {
> +	    frame.eh_return_allocated[regno] = true;
> +	    allocate_gpr_slot (realregno);
> +	  }
> +      }

This change is unnecessary if we just mark the slots with SLOT_REQUIRED.


> @@ -8035,6 +8048,23 @@ aarch64_layout_frame (void)
>   frame.wb_pop_candidate1 = frame.wb_push_candidate1;
>   frame.wb_pop_candidate2 = frame.wb_push_candidate2;
>  
> +  /* EH data registers are not pop canidates. */
> +  if (crtl->calls_eh_return)
> +    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)> 
> +      {
> +	if (frame.eh_return_allocated[regno]
> +	    && frame.wb_pop_candidate1 == EH_RETURN_DATA_REGNO (regno))
> +	{
> +	  frame.wb_pop_candidate1 = frame.wb_pop_candidate2;
> +	  frame.wb_pop_candidate2 = INVALID_REGNUM;
> +	}
> +	if (frame.eh_return_allocated[regno]
> +	    && frame.wb_pop_candidate2 == EH_RETURN_DATA_REGNO (regno))
> +	{
> +	  frame.wb_pop_candidate2 = INVALID_REGNUM;
> +	}
> +      }

This is unnecessary since we can just avoid making them push candidates
if there is no frame chain, eg:

if ((!crtl->calls_eh_return || frame.emit_frame_chain) && !push_regs.empty ()
      && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp))


@@ -8681,6 +8712,20 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
       if (frame.is_scs_enabled && regno == LR_REGNUM)
 	return true;
 
+      /* Skip the eh return data registers if we are
+	 returning normally rather than via eh_return. */
+      if (!was_eh_return && crtl->calls_eh_return)
+	{
+	  for (unsigned ehregno = 0;
+	       EH_RETURN_DATA_REGNO (ehregno) != INVALID_REGNUM;
+	       ehregno++)
+	    {
+	      if (EH_RETURN_DATA_REGNO (ehregno) == regno
+		  && frame.eh_return_allocated[ehregno])
+		return true;
+	    }
+	}
+

So this could be something like:

      if (!was_eh_return && crtl->calls_eh_return && IS_EH_RETURN_REGNUM (regno))
        return true;
 
Cheers,
Wilco

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] aarch64: Fix normal returns inside functions which use eh_returns [PR114843]
  2024-05-20 14:43 [PATCH v3] aarch64: Fix normal returns inside functions which use eh_returns [PR114843] Wilco Dijkstra
@ 2024-05-21 15:00 ` Richard Sandiford
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Sandiford @ 2024-05-21 15:00 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: quic_apinski, GCC Patches

Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi Andrew,
>
> A few comments on the implementation, I think it can be simplified a lot:

FWIW, I agree with Wilco's comments, except:

>> +++ b/gcc/config/aarch64/aarch64.h
>> @@ -700,8 +700,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
>> #define DWARF2_UNWIND_INFO 1
>>  
>>  /* Use R0 through R3 to pass exception handling information.  */
>> +#define EH_RETURN_DATA_REGISTERS_N 4
>>  #define EH_RETURN_DATA_REGNO(N) \
>> -  ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
>> +  ((N) < EH_RETURN_DATA_REGISTERS_N ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
>  
> It would be useful to add a macro IS_EH_RETURN_REGNUM(regnum) that just checks
> the range R0_REGNUM to R0_REGNUM + EH_RETURN_DATA_REGISTERS_N.

I've just pushed a patch that adds a global eh_return_data_regs set,
so I think we can test that instead.

>> @@ -929,6 +928,7 @@ struct GTY (()) aarch64_frame
>>      outgoing arguments) of each register save slot, or -2 if no save is
>>      needed.  */
>>   poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
>> +  bool eh_return_allocated[EH_RETURN_DATA_REGISTERS_N];
>
> This doesn't make much sense - besides X0-X3, we also need X5 and X6 for eh_return.
> If these or any of the other temporaries used by epilog are callee-saved somehow,
> things are going horribly wrong already... So what do we gain by doing this?
>
>
>> +++ b/gcc/config/aarch64/aarch64.cc
>> @@ -7792,6 +7792,7 @@ aarch64_layout_frame (void)
>> 
>>  #define SLOT_NOT_REQUIRED (-2)
>>  #define SLOT_REQUIRED     (-1)
>> +#define SLOT_EH_RETURN_REQUIRED     (-3)
>  
> I don't see a need for this.
>
>
>> @@ -7949,6 +7950,18 @@ aarch64_layout_frame (void)
>>         stopping it from being individually shrink-wrapped.  */
>>      allocate_gpr_slot (R30_REGNUM);
>>  
>> +  /* Allocate the eh_return first. */
>> +  if (crtl->calls_eh_return)
>> +    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
>> +      {
>> +	int realregno = EH_RETURN_DATA_REGNO (regno);
>> +	if (known_eq (frame.reg_offset[realregno], SLOT_EH_RETURN_REQUIRED))
>> +	  {
>> +	    frame.eh_return_allocated[regno] = true;
>> +	    allocate_gpr_slot (realregno);
>> +	  }
>> +      }
>
> This change is unnecessary if we just mark the slots with SLOT_REQUIRED.

Also, is it necessary to allocate EH data registers first?

>> @@ -8035,6 +8048,23 @@ aarch64_layout_frame (void)
>>   frame.wb_pop_candidate1 = frame.wb_push_candidate1;
>>   frame.wb_pop_candidate2 = frame.wb_push_candidate2;
>>  
>> +  /* EH data registers are not pop canidates. */
>> +  if (crtl->calls_eh_return)
>> +    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)> 
>> +      {
>> +	if (frame.eh_return_allocated[regno]
>> +	    && frame.wb_pop_candidate1 == EH_RETURN_DATA_REGNO (regno))
>> +	{
>> +	  frame.wb_pop_candidate1 = frame.wb_pop_candidate2;
>> +	  frame.wb_pop_candidate2 = INVALID_REGNUM;
>> +	}
>> +	if (frame.eh_return_allocated[regno]
>> +	    && frame.wb_pop_candidate2 == EH_RETURN_DATA_REGNO (regno))
>> +	{
>> +	  frame.wb_pop_candidate2 = INVALID_REGNUM;
>> +	}
>> +      }
>
> This is unnecessary since we can just avoid making them push candidates
> if there is no frame chain, eg:
>
> if ((!crtl->calls_eh_return || frame.emit_frame_chain) && !push_regs.empty ()
>       && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp))

I agree we should do the check here (and similarly for the second register),
rather than fixing it up later.  But IMO we should test the register directly:

  if (!push_regs.empty ()
      && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp)
      && (!crtl->calls_eh_return
	  || !TEST_HARD_REG_BIT (eh_return_data_regs, push_regs[0])))

In some ways it seems unfortunate that we're generating two different
copies of the epilogue in order to skip two LDPs that (with a bit of
work) could easily be done before entering a combined epilogue.
But we already have a branch on EH_RETURN_TAKEN_RTX as well,
so maybe this is the tipping point at which duplication is worthwhile.

Thanks,
Richard

> @@ -8681,6 +8712,20 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
>        if (frame.is_scs_enabled && regno == LR_REGNUM)
>  	return true;
>  
> +      /* Skip the eh return data registers if we are
> +	 returning normally rather than via eh_return. */
> +      if (!was_eh_return && crtl->calls_eh_return)
> +	{
> +	  for (unsigned ehregno = 0;
> +	       EH_RETURN_DATA_REGNO (ehregno) != INVALID_REGNUM;
> +	       ehregno++)
> +	    {
> +	      if (EH_RETURN_DATA_REGNO (ehregno) == regno
> +		  && frame.eh_return_allocated[ehregno])
> +		return true;
> +	    }
> +	}
> +
>
> So this could be something like:
>
>       if (!was_eh_return && crtl->calls_eh_return && IS_EH_RETURN_REGNUM (regno))
>         return true;
>  
> Cheers,
> Wilco

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v3] aarch64: Fix normal returns inside functions which use eh_returns [PR114843]
@ 2024-05-05 23:58 Andrew Pinski
  0 siblings, 0 replies; 3+ messages in thread
From: Andrew Pinski @ 2024-05-05 23:58 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

The problem here is that on a normal return path, we still restore the
eh data return when we should not.
Instead of one return path in the case of eh_return, this changes over
to use multiple returns pathes just like a normal function.
On the normal path (non-eh return), we need to skip restoring of the eh
return data registers.

This fixes the code generation of _Unwind_RaiseException where the return value
was currupted.

Note this adds some testcases which might fail on some targets.
I know of the following targets will fail also:
arm is recorded as PR 114847.
powerpc is recorded as PR 114846.

Build and tested for aarch64-linux-gnu with no regressions.

Changes in:
* v2: Fix logical error in aarch64_pop_regs which was a premature optimization.
	Check regno1 and regno2 independently now.
	Also add eh_return-5.c which tests that case.
* v3: Instead of redoing the detection of the eh_return register store off
	to frame.eh_return_allocated. Also don't consider eh_return data
	registers as pop canidates.

Note v2 was not submitted.

	PR target/114843

gcc/ChangeLog:

	* config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): New
	prototype.
	* config/aarch64/aarch64.h (EH_RETURN_DATA_REGISTERS_N): New define.
	(EH_RETURN_DATA_REGNO): Use EH_RETURN_DATA_REGISTERS_N instead of hard coding 4.
	(aarch64_frame): Add eh_return_allocated.
	* config/aarch64/aarch64.cc (aarch64_restore_callee_saves): Skip
	over the eh return data regs if not eh return.
	(aarch64_expand_epilogue): New function, pass false.
	(aarch64_expand_epilogue): Add was_eh_return argument.
	Update calls to aarch64_restore_callee_saves and aarch64_pop_regs.
	For eh_returns, update the sp and do an indirect jump.
	Don't check EH_RETURN_TAKEN_RTX any more.
	* config/aarch64/aarch64.h (EH_RETURN_TAKEN_RTX): Delete.
	* config/aarch64/aarch64.md (eh_return): New define_expand.
	(eh_return_internal): New pattern for eh_returns.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/eh_return-3.c: Update testcase.
	* gcc.c-torture/execute/eh_return-1.c: New test.
	* gcc.c-torture/execute/eh_return-2.c: New test.
	* gcc.c-torture/execute/eh_return-3.c: New test.
	* gcc.c-torture/execute/eh_return-4.c: New test.
	* gcc.c-torture/execute/eh_return-5.c: New test.
	* gcc.target/aarch64/eh_return-4.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/config/aarch64/aarch64-protos.h           |  1 +
 gcc/config/aarch64/aarch64.cc                 | 78 ++++++++++++++-----
 gcc/config/aarch64/aarch64.h                  | 14 ++--
 gcc/config/aarch64/aarch64.md                 | 24 ++++++
 .../gcc.c-torture/execute/eh_return-1.c       | 20 +++++
 .../gcc.c-torture/execute/eh_return-2.c       | 23 ++++++
 .../gcc.c-torture/execute/eh_return-3.c       | 25 ++++++
 .../gcc.c-torture/execute/eh_return-4.c       | 25 ++++++
 .../gcc.c-torture/execute/eh_return-5.c       | 24 ++++++
 .../gcc.target/aarch64/eh_return-3.c          | 12 ++-
 .../gcc.target/aarch64/eh_return-4.c          | 32 ++++++++
 11 files changed, 244 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-3.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-4.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/eh_return-4.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 42639e9efcf..efe86d52873 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -904,6 +904,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
 const char * aarch64_output_probe_stack_range (rtx, rtx);
 const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
 void aarch64_err_no_fpadvsimd (machine_mode);
+void aarch64_expand_epilogue (rtx_call_insn *, bool);
 void aarch64_expand_epilogue (rtx_call_insn *);
 rtx aarch64_ptrue_all (unsigned int);
 opt_machine_mode aarch64_ptrue_all_mode (rtx);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 662ff5a9b0c..afbe4eeb340 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -7792,6 +7792,7 @@ aarch64_layout_frame (void)
 
 #define SLOT_NOT_REQUIRED (-2)
 #define SLOT_REQUIRED     (-1)
+#define SLOT_EH_RETURN_REQUIRED     (-3)
 
   frame.wb_push_candidate1 = INVALID_REGNUM;
   frame.wb_push_candidate2 = INVALID_REGNUM;
@@ -7805,7 +7806,7 @@ aarch64_layout_frame (void)
   /* ... that includes the eh data registers (if needed)...  */
   if (crtl->calls_eh_return)
     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
-      frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_REQUIRED;
+      frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_EH_RETURN_REQUIRED;
 
   /* ... and any callee saved register that dataflow says is live.  */
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
@@ -7949,6 +7950,18 @@ aarch64_layout_frame (void)
        stopping it from being individually shrink-wrapped.  */
     allocate_gpr_slot (R30_REGNUM);
 
+  /* Allocate the eh_return first. */
+  if (crtl->calls_eh_return)
+    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
+      {
+	int realregno = EH_RETURN_DATA_REGNO (regno);
+	if (known_eq (frame.reg_offset[realregno], SLOT_EH_RETURN_REQUIRED))
+	  {
+	    frame.eh_return_allocated[regno] = true;
+	    allocate_gpr_slot (realregno);
+	  }
+      }
+
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
     if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
       allocate_gpr_slot (regno);
@@ -8035,6 +8048,23 @@ aarch64_layout_frame (void)
   frame.wb_pop_candidate1 = frame.wb_push_candidate1;
   frame.wb_pop_candidate2 = frame.wb_push_candidate2;
 
+  /* EH data registers are not pop canidates. */
+  if (crtl->calls_eh_return)
+    for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
+      {
+	if (frame.eh_return_allocated[regno]
+	    && frame.wb_pop_candidate1 == EH_RETURN_DATA_REGNO (regno))
+	{
+	  frame.wb_pop_candidate1 = frame.wb_pop_candidate2;
+	  frame.wb_pop_candidate2 = INVALID_REGNUM;
+	}
+	if (frame.eh_return_allocated[regno]
+	    && frame.wb_pop_candidate2 == EH_RETURN_DATA_REGNO (regno))
+	{
+	  frame.wb_pop_candidate2 = INVALID_REGNUM;
+	}
+      }
+
   /* Shadow call stack only deals with functions where the LR is pushed
      onto the stack and without specifying the "no_sanitize" attribute
      with the argument "shadow-call-stack".  */
@@ -8662,7 +8692,8 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
 
 static void
 aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
-			      array_slice<unsigned int> regs, rtx *cfi_ops)
+			      array_slice<unsigned int> regs, rtx *cfi_ops,
+			      bool was_eh_return = false)
 {
   aarch64_frame &frame = cfun->machine->frame;
   poly_int64 offset;
@@ -8681,6 +8712,20 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
       if (frame.is_scs_enabled && regno == LR_REGNUM)
 	return true;
 
+      /* Skip the eh return data registers if we are
+	 returning normally rather than via eh_return. */
+      if (!was_eh_return && crtl->calls_eh_return)
+	{
+	  for (unsigned ehregno = 0;
+	       EH_RETURN_DATA_REGNO (ehregno) != INVALID_REGNUM;
+	       ehregno++)
+	    {
+	      if (EH_RETURN_DATA_REGNO (ehregno) == regno
+		  && frame.eh_return_allocated[ehregno])
+		return true;
+	    }
+	}
+
       return false;
     };
 
@@ -9805,6 +9850,11 @@ aarch64_use_return_insn_p (void)
 
   return known_eq (cfun->machine->frame.frame_size, 0);
 }
+void
+aarch64_expand_epilogue (rtx_call_insn *sibcall)
+{
+  aarch64_expand_epilogue (sibcall, false);
+}
 
 /* Generate the epilogue instructions for returning from a function.
    This is almost exactly the reverse of the prolog sequence, except
@@ -9812,7 +9862,7 @@ aarch64_use_return_insn_p (void)
    from a deallocated stack, and we optimize the unwind records by
    emitting them all together if possible.  */
 void
-aarch64_expand_epilogue (rtx_call_insn *sibcall)
+aarch64_expand_epilogue (rtx_call_insn *sibcall, bool was_eh_return)
 {
   aarch64_frame &frame = cfun->machine->frame;
   poly_int64 initial_adjust = frame.initial_adjust;
@@ -9919,7 +9969,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
      restore x30, we don't need to restore x30 again in the traditional
      way.  */
   aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
-				frame.saved_gprs, &cfi_ops);
+				frame.saved_gprs, &cfi_ops, was_eh_return);
 
   if (need_barrier_p)
     aarch64_emit_stack_tie (stack_pointer_rtx);
@@ -9968,26 +10018,12 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
     }
 
   /* Stack adjustment for exception handler.  */
-  if (crtl->calls_eh_return && !sibcall)
-    {
-      /* If the EH_RETURN_TAKEN_RTX flag is set then we need
-	 to unwind the stack and jump to the handler, otherwise
-	 skip this eh_return logic and continue with normal
-	 return after the label.  We have already reset the CFA
-	 to be SP; letting the CFA move during this adjustment
-	 is just as correct as retaining the CFA from the body
-	 of the function.  Therefore, do nothing special.  */
-      rtx_code_label *label = gen_label_rtx ();
-      rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX,
-						   label);
-      rtx jump = emit_jump_insn (x);
-      JUMP_LABEL (jump) = label;
-      LABEL_NUSES (label)++;
+  if (was_eh_return && !sibcall)
+    {
       emit_insn (gen_add2_insn (stack_pointer_rtx,
 				EH_RETURN_STACKADJ_RTX));
       emit_jump_insn (gen_indirect_jump (EH_RETURN_HANDLER_RTX));
-      emit_barrier ();
-      emit_label (label);
+      return;
     }
 
   /* We prefer to emit the combined return/authenticate instruction RETAA,
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 4fa1dfc7906..bfcd5e84510 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -700,8 +700,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
 #define DWARF2_UNWIND_INFO 1
 
 /* Use R0 through R3 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGISTERS_N 4
 #define EH_RETURN_DATA_REGNO(N) \
-  ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
+  ((N) < EH_RETURN_DATA_REGISTERS_N ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
 
 /* Select a format to encode pointers in exception handling data.  */
 #define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
@@ -723,10 +724,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
 /* Output assembly strings after .cfi_startproc is emitted.  */
 #define ASM_POST_CFI_STARTPROC  aarch64_post_cfi_startproc
 
-/* For EH returns X4 is a flag that is set in the EH return
-   code paths and then X5 and X6 contain the stack adjustment
+/* For EH returns X5 and X6 contain the stack adjustment
    and return address respectively.  */
-#define EH_RETURN_TAKEN_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
 #define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R5_REGNUM)
 #define EH_RETURN_HANDLER_RTX	gen_rtx_REG (Pmode, R6_REGNUM)
 
@@ -929,6 +928,7 @@ struct GTY (()) aarch64_frame
      outgoing arguments) of each register save slot, or -2 if no save is
      needed.  */
   poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
+  bool eh_return_allocated[EH_RETURN_DATA_REGISTERS_N];
 
   /* The list of GPRs, FPRs and predicate registers that have nonnegative
      entries in reg_offset.  The registers are listed in order of
@@ -1015,10 +1015,12 @@ struct GTY (()) aarch64_frame
      eventually.  The initial value of a pop candidate is copied from its
      corresponding push candidate.
 
-     Currently, different pop candidates are only used for shadow call
+     Different pop candidates are used for shadow call
      stack.  When "-fsanitize=shadow-call-stack" is specified, we replace
      x30 in the pop candidate with INVALID_REGNUM to ensure that x30 is
-     not popped twice.  */
+     not popped twice.
+
+     Also eh_return data registers are not pop candidates. */
   unsigned wb_push_candidate1;
   unsigned wb_push_candidate2;
   unsigned wb_pop_candidate1;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 046a249475d..a6cedd0f1b8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1058,6 +1058,30 @@ (define_insn "simple_return"
    (set_attr "sls_length" "retbr")]
 )
 
+;; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand"))]
+  ""
+{
+  emit_move_insn (EH_RETURN_HANDLER_RTX, operands[0]);
+
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  aarch64_expand_epilogue (nullptr, true);
+  DONE;
+})
+
+
 (define_insn "aarch64_cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
 				(const_int 0))
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c
new file mode 100644
index 00000000000..3928a58d841
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c
@@ -0,0 +1,20 @@
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered. */
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  if (*a == 5)
+    return 5;
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 0, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c
new file mode 100644
index 00000000000..41fb48c99f9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c
@@ -0,0 +1,23 @@
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, also use alloca. */
+__attribute__((noipa))
+void g(void*) {}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  g(__builtin_alloca(offset));
+  if (*a == 5)
+    return 5;
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 67, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c
new file mode 100644
index 00000000000..dfd73d35a43
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c
@@ -0,0 +1,25 @@
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, also use large stack. */
+__attribute__((noipa))
+void g(void *a) {}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  int h[10245];
+  g(h);
+  if (*a == 5)
+    return 5;
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 67, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c
new file mode 100644
index 00000000000..2f1126ba7c5
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c
@@ -0,0 +1,25 @@
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, using sibcall. */
+__attribute__((noipa))
+int g()
+{
+   return 5;
+}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  if (*a == 5)
+    return g();
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 67, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-5.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-5.c
new file mode 100644
index 00000000000..50b1b181b09
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-5.c
@@ -0,0 +1,24 @@
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered. */
+__attribute__((noipa))
+void g(void){}
+int arr[6]={0,1,2,3,4,5};
+__attribute__((noipa))
+int f(long *offset, void *handler)
+{
+  g();
+  if (*offset < 5)
+    return arr[*offset];
+  __builtin_eh_return (*offset, handler);
+}
+
+int main()
+{
+  long o = 1;
+  if (f(&o, 0) != 1)
+     __builtin_abort();
+  return 0; 
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return-3.c b/gcc/testsuite/gcc.target/aarch64/eh_return-3.c
index d180fa7c455..2ed9b53b3f1 100644
--- a/gcc/testsuite/gcc.target/aarch64/eh_return-3.c
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return-3.c
@@ -7,11 +7,7 @@
 **	hint	25 // paciasp
 **	...
 **	cbz	w2, .*
-**	mov	x4, 0
-**	...
-**	cbz	x4, .*
-**	add	sp, sp, x5
-**	br	x6
+**	add	sp, sp, 32
 ** (
 **	hint	29 // autiasp
 **	ret
@@ -19,9 +15,11 @@
 **	retaa
 ** )
 **	mov	x5, x0
-**	mov	x4, 1
 **	mov	x6, x1
-**	b	.*
+**	...
+**	add	sp, sp, 32
+**	add	sp, sp, x5
+**	br	x6
 */
 void
 foo (unsigned long off, void *handler, int c)
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return-4.c b/gcc/testsuite/gcc.target/aarch64/eh_return-4.c
new file mode 100644
index 00000000000..7c58765dab2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return-4.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=pac-ret+leaf -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+**foo:
+**	hint	25 // paciasp
+**	sub	sp, sp, #32
+**	...
+**	mov	x5, x0
+**	cbz	w2, .*
+**	mov	w0, 5
+**	add	sp, sp, 32
+** (
+**	hint	29 // autiasp
+**	ret
+** |
+**	retaa
+** )
+**	mov	x6, x1
+**	...
+**	add	sp, sp, 32
+**	add	sp, sp, x5
+**	br	x6
+*/
+int
+foo (unsigned long off, void *handler, int c)
+{
+  if (c)
+    return 5;
+  __builtin_eh_return (off, handler);
+}
-- 
2.43.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-05-21 15:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-20 14:43 [PATCH v3] aarch64: Fix normal returns inside functions which use eh_returns [PR114843] Wilco Dijkstra
2024-05-21 15:00 ` Richard Sandiford
  -- strict thread matches above, loose matches on Subject: below --
2024-05-05 23:58 Andrew Pinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).