public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] ELFv2 ABI 3/8: Track single CR fields in DWARF CFI
@ 2013-11-11 14:44 Ulrich Weigand
  0 siblings, 0 replies; only message in thread
From: Ulrich Weigand @ 2013-11-11 14:44 UTC (permalink / raw)
  To: gcc-patches

Hello,

this patch adds another ELFv2 ABI feature: explicit tracking of CR fields
in DWARF CFI.

In the current ABI, DWARF CFI contains only a single record describing
the save location of the whole CR field.  It is implicit that all (or
at least all call-clobbered) fields are present at that location.

Now, if you use the instructions that save and restore the whole CR
at once, this approach might seem reasonable.  Unfortunately, with
current POWER processors, those instructions tend to be significantly
slower that those that access only single CR fields.

In particular in routines where only one or two CR fields are actually
clobbered and need to be saved, we could improve performance of prolog
and epilog code by saving/restoring only selected CR fields.  However,
this is not possible in the current ABI since there is no way to
describe this fact in the CFI.

With the ELFv2 ABI, every CR field gets its own CFI record (using the
register numbers 68 .. 75 to stand for CR0 .. CR7).  Now, those fields
will still usually be saved in the same 4-byte field on the stack.
The semantics of a CFI record for field CRx is that the memory location
holds 4 bytes, and the 4-bit nibble corresponding to CRx within those
4 bytes hold the CRx value to be restored.

The one problem with this scheme is the way uw_install_context tries
to modify saved valued when unwinding the stack: it will simply copy
over the whole field into the save slot of the unwinder routine
(that calls __builtin_eh_return).  This clearly does not work if
multiple CR fields need to be restored independently.

To fix this, the prolog/epilog code for unwinder routines will use
*multiple* stack slots, one for each call-saved CR fields, and save
and restore those fields to and from their own slot.  This will
allow uw_install_context to install values for multiple fields.
(Note that there is already precedent for unwinder routines being
treated specially in the rs6000.c prologue/epilogue code ...)

Bye,
Ulrich


gcc/ChangeLog:

2013-11-11  Ulrich Weigand  <Ulrich.Weigand@de.ibm.com>

	* config/rs6000/rs6000.c (struct rs6000_stack): New member ehcr_offset.
	(rs6000_stack_info): For ABI_ELFv2, allocate space for separate CR
	field save areas if the function calls __builtin_eh_return.
	(rs6000_emit_move_from_cr): New function.
	(rs6000_emit_prologue): Use it.  For ABI_ELFv2, generate separate
	CFI records for each saved CR field.  For functions that call
	__builtin_eh_return, save all CR fields into separate slots.
	(restore_saved_cr): For ABI_ELFv2, generate separate CFA_RESTORE
	entries for each saved CR field.
	(add_crlr_cfa_restore): Likewise.
	(rs6000_emit_epilogue): For ABI_ELFv2, if the function calls
	__builtin_eh_return, restore each CR field from its own slot.

libgcc/ChangeLog:

2013-11-11  Ulrich Weigand  <Ulrich.Weigand@de.ibm.com>

	* config/rs6000/linux-unwind.h (R_CR3, R_CR4): New macros.
	(ppc_fallback_frame_state) [_CALL_ELF == 2]: Create CFI entry
	for CR3 and CR4.


Index: gcc/gcc/config/rs6000/rs6000.c
===================================================================
--- gcc.orig/gcc/config/rs6000/rs6000.c
+++ gcc/gcc/config/rs6000/rs6000.c
@@ -97,6 +97,7 @@ typedef struct rs6000_stack {
   int spe_gp_save_offset;	/* offset to save spe 64-bit gprs  */
   int varargs_save_offset;	/* offset to save the varargs registers */
   int ehrd_offset;		/* offset to EH return data */
+  int ehcr_offset;		/* offset to EH CR field data */
   int reg_size;			/* register size (4 or 8) */
   HOST_WIDE_INT vars_size;	/* variable save area size */
   int parm_size;		/* outgoing parameter size */
@@ -19847,6 +19848,7 @@ rs6000_stack_info (void)
   rs6000_stack_t *info_ptr = &stack_info;
   int reg_size = TARGET_32BIT ? 4 : 8;
   int ehrd_size;
+  int ehcr_size;
   int save_align;
   int first_gp;
   HOST_WIDE_INT non_fixed_size;
@@ -19940,6 +19942,18 @@ rs6000_stack_info (void)
   else
     ehrd_size = 0;
 
+  /* In the ELFv2 ABI, we also need to allocate space for separate
+     CR field save areas if the function calls __builtin_eh_return.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      /* This hard-codes that we have three call-saved CR fields.  */
+      ehcr_size = 3 * reg_size;
+      /* We do *not* use the regular CR save mechanism.  */
+      info_ptr->cr_save_p = 0;
+    }
+  else
+    ehcr_size = 0;
+
   /* Determine various sizes.  */
   info_ptr->reg_size     = reg_size;
   info_ptr->fixed_size   = RS6000_SAVE_AREA;
@@ -20009,6 +20023,8 @@ rs6000_stack_info (void)
 	}
       else
 	info_ptr->ehrd_offset      = info_ptr->gp_save_offset - ehrd_size;
+
+      info_ptr->ehcr_offset      = info_ptr->ehrd_offset - ehcr_size;
       info_ptr->cr_save_offset   = reg_size; /* first word when 64-bit.  */
       info_ptr->lr_save_offset   = 2*reg_size;
       break;
@@ -20071,6 +20087,7 @@ rs6000_stack_info (void)
 					 + info_ptr->spe_gp_size
 					 + info_ptr->spe_padding_size
 					 + ehrd_size
+					 + ehcr_size
 					 + info_ptr->cr_size
 					 + info_ptr->vrsave_size,
 					 save_align);
@@ -21522,6 +21539,43 @@ rs6000_emit_savres_rtx (rs6000_stack_t *
   return insn;
 }
 
+/* Emit code to store CR fields that need to be saved into REG.  */
+
+static void
+rs6000_emit_move_from_cr (rtx reg)
+{
+  /* Only the ELFv2 ABI allows storing only selected fields.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
+    {
+      int i, cr_reg[8], count = 0;
+
+      /* Collect CR fields that must be saved.  */
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  cr_reg[count++] = i;
+
+      /* If it's just a single one, use mfcrf.  */
+      if (count == 1)
+	{
+	  rtvec p = rtvec_alloc (1);
+	  rtvec r = rtvec_alloc (2);
+	  RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
+	  RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
+	  RTVEC_ELT (p, 0)
+	    = gen_rtx_SET (VOIDmode, reg,
+			   gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
+
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+	  return;
+	}
+
+      /* ??? It might be better to handle count == 2 / 3 cases here
+	 as well, using logical operations to combine the values.  */
+    }
+
+  emit_insn (gen_movesi_from_cr (reg));
+}
+
 /* Determine whether the gp REG is really used.  */
 
 static bool
@@ -21826,7 +21880,7 @@ rs6000_emit_prologue (void)
     {
       cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
       START_USE (cr_save_regno);
-      emit_insn (gen_movesi_from_cr (cr_save_rtx));
+      rs6000_emit_move_from_cr (cr_save_rtx);
     }
 
   /* Do any required saving of fpr's.  If only one or two to save, do
@@ -22143,7 +22197,7 @@ rs6000_emit_prologue (void)
 	{
 	  START_USE (0);
 	  cr_save_rtx = gen_rtx_REG (SImode, 0);
-	  emit_insn (gen_movesi_from_cr (cr_save_rtx));
+	  rs6000_emit_move_from_cr (cr_save_rtx);
 	}
 
       /* Saving CR requires a two-instruction sequence: one instruction
@@ -22187,12 +22241,71 @@ rs6000_emit_prologue (void)
 			   GEN_INT (info->cr_save_offset + sp_off));
       mem = gen_frame_mem (SImode, addr);
 
-      /* We still cannot express that multiple CR fields are saved in the
-	 CR save slot.  By convention, we use a single CR regnum to represent
-	 the fact that all call-saved CR fields are saved.  We use CR2_REGNO
-	 to be compatible with gcc-2.95 on Linux.  */
-      rtx set = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, CR2_REGNO));
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+      if (DEFAULT_ABI == ABI_ELFv2)
+	{
+	  /* In the ELFv2 ABI we generate separate CFI records for each
+	     CR field that was actually saved.  They all point to the
+	     same 32-bit stack slot.  */
+	  rtx crframe[8];
+	  int n_crframe = 0;
+
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      {
+		crframe[n_crframe]
+		  = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
+		n_crframe++;
+	     }
+
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_PARALLEL (VOIDmode,
+					  gen_rtvec_v (n_crframe, crframe)));
+	}
+      else
+	{
+	  /* In other ABIs, by convention, we use a single CR regnum to
+	     represent the fact that all call-saved CR fields are saved.
+	     We use CR2_REGNO to be compatible with gcc-2.95 on Linux.  */
+	  rtx set = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR2_REGNO));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	}
+    }
+
+  /* In the ELFv2 ABI we need to save all call-saved CR fields into
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+      rtx crsave;
+
+      /* ??? We might get better performance by using multiple mfocrf
+	 instructions.  */
+      crsave = gen_rtx_REG (SImode, 0);
+      emit_insn (gen_movesi_from_cr (crsave));
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtvec p = rtvec_alloc (2);
+	    RTVEC_ELT (p, 0)
+	      = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
+	    RTVEC_ELT (p, 1)
+	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+	    insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
+					   sp_reg_rtx, cr_off + sp_off));
+
+	    cr_off += reg_size;
+	  }
     }
 
   /* Update stack and set back pointer unless this is V.4,
@@ -22565,6 +22678,7 @@ restore_saved_cr (rtx reg, int using_mfc
 
   if (using_mfcr_multiple && count > 1)
     {
+      rtx insn;
       rtvec p;
       int ndx;
 
@@ -22582,16 +22696,43 @@ restore_saved_cr (rtx reg, int using_mfc
 			   gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
 	    ndx++;
 	  }
-      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       gcc_assert (ndx == count);
+
+      /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	 CR field separately.  */
+      if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	{
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
+
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
     }
   else
     for (i = 0; i < 8; i++)
       if (save_reg_p (CR0_REGNO + i))
-	emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i),
-					reg));
+	{
+	  rtx insn = emit_insn (gen_movsi_to_cr_one
+				 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
 
-  if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
+	  /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	     CR field separately, attached to the insn that in fact
+	     restores this particular CR field.  */
+	  if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	    {
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
+
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+
+  /* For other ABIs, we just generate a single CFA_RESTORE for CR2.  */
+  if (!exit_func && DEFAULT_ABI != ABI_ELFv2
+      && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
     {
       rtx insn = get_last_insn ();
       rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
@@ -22632,10 +22773,22 @@ restore_saved_lr (int regno, bool exit_f
 static rtx
 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
 {
-  if (info->cr_save_p)
+  if (DEFAULT_ABI == ABI_ELFv2)
+    {
+      int i;
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  {
+	    rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
+					   cfa_restores);
+	  }
+    }
+  else if (info->cr_save_p)
     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
 				   gen_rtx_REG (SImode, CR2_REGNO),
 				   cfa_restores);
+
   if (info->lr_save_p)
     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
 				   gen_rtx_REG (Pmode, LR_REGNO),
@@ -23133,6 +23286,35 @@ rs6000_emit_epilogue (int sibcall)
 	       || (!restoring_GPRs_inline
 		   && info->first_fp_reg_save == 64));
 
+  /* In the ELFv2 ABI we need to restore all call-saved CR fields from
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtx reg = gen_rtx_REG (SImode, 0);
+	    emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				       cr_off + frame_off));
+
+	    insn = emit_insn (gen_movsi_to_cr_one
+				(gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
+
+	    if (!exit_func && flag_shrink_wrap)
+	      {
+		add_reg_note (insn, REG_CFA_RESTORE,
+			      gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (insn) = 1;
+	      }
+
+	    cr_off += reg_size;
+	  }
+    }
+
   /* Get the old lr if we saved it.  If we are restoring registers
      out-of-line, then the out-of-line routines can do this for us.  */
   if (restore_lr && restoring_GPRs_inline)
Index: gcc/libgcc/config/rs6000/linux-unwind.h
===================================================================
--- gcc.orig/libgcc/config/rs6000/linux-unwind.h
+++ gcc/libgcc/config/rs6000/linux-unwind.h
@@ -24,6 +24,8 @@
 
 #define R_LR		65
 #define R_CR2		70
+#define R_CR3		71
+#define R_CR4		72
 #define R_VR0		77
 #define R_VRSAVE	109
 
@@ -215,8 +217,16 @@ ppc_fallback_frame_state (struct _Unwind
 #ifndef __LITTLE_ENDIAN__
   cr_offset += sizeof (long) - 4;
 #endif
+  /* In the ELFv1 ABI, CR2 stands in for the whole CR.  */
   fs->regs.reg[R_CR2].how = REG_SAVED_OFFSET;
   fs->regs.reg[R_CR2].loc.offset = cr_offset;
+#if _CALL_ELF == 2
+  /* In the ELFv2 ABI, every CR field has a separate CFI entry.  */
+  fs->regs.reg[R_CR3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[R_CR3].loc.offset = cr_offset;
+  fs->regs.reg[R_CR4].how = REG_SAVED_OFFSET;
+  fs->regs.reg[R_CR4].loc.offset = cr_offset;
+#endif
 
   fs->regs.reg[R_LR].how = REG_SAVED_OFFSET;
   fs->regs.reg[R_LR].loc.offset = (long) &regs->link - new_cfa;
-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  Ulrich.Weigand@de.ibm.com

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2013-11-11 14:43 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-11-11 14:44 [PATCH, rs6000] ELFv2 ABI 3/8: Track single CR fields in DWARF CFI Ulrich Weigand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).