public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 0/3] [amdgcn] Improve debug support.
@ 2021-06-22 17:14 Hafiz Abid Qadeer
  2021-06-22 17:14 ` [PATCH 1/3] [amdgcn] Update CFI configuration Hafiz Abid Qadeer
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Hafiz Abid Qadeer @ 2021-06-22 17:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: ams, abidh

This patch series improves debug experience with ROCGDB.  It enables
generation of CFI, makes use of frame pointer in CFA calculations and
uses proposed values of for address spaces in the debug information.

Tested amdgcn-amdhsa target with no regression.  Debugger testing was
done manually. 

Hafiz Abid Qadeer (3):
  [amdgcn] Update CFI configuration
  [amdgcn] Use frame pointer for CFA expressions.
  [amdgcn] Add hook for DWARF address spaces.

 gcc/common/config/gcn/gcn-common.c |   2 +-
 gcc/config/gcn/gcn.c               | 171 +++++++++++++++++++++++++----
 gcc/config/gcn/gcn.h               |  10 +-
 3 files changed, 157 insertions(+), 26 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] [amdgcn] Update CFI configuration
  2021-06-22 17:14 [PATCH 0/3] [amdgcn] Improve debug support Hafiz Abid Qadeer
@ 2021-06-22 17:14 ` Hafiz Abid Qadeer
  2021-06-23  9:32   ` Andrew Stubbs
  2021-06-22 17:14 ` [PATCH 2/3] [amdgcn] Use frame pointer for CFA expressions Hafiz Abid Qadeer
  2021-06-22 17:14 ` [PATCH 3/3] [amdgcn] Add hook for DWARF address spaces Hafiz Abid Qadeer
  2 siblings, 1 reply; 7+ messages in thread
From: Hafiz Abid Qadeer @ 2021-06-22 17:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: ams, abidh

Currently we don't get any call frame information for the amdgcn target.
This patch makes necessary adjustments to generate CFI that can work with
ROCGDB (ROCm 3.8+).

gcc/

	* config/gcn/gcn.c (move_callee_saved_registers): Emit CFI notes for
	prologue register saves.
	(gcn_debug_unwind_info): Use UI_DWARF2.
	(gcn_dwarf_register_number): Map DWARF_LINK_REGISTER to DWARF PC.
	(gcn_dwarf_register_span): DWARF_LINK_REGISTER doesn't span.
	* config/gcn/gcn.h: (DWARF_FRAME_RETURN_COLUMN): New define.
	(DWARF_LINK_REGISTER): New define.
	(FIRST_PSEUDO_REGISTER): Increment.
	(FIXED_REGISTERS): Add entry for DWARF_LINK_REGISTER.
	(CALL_USED_REGISTERS): Likewise.
	(REGISTER_NAMES): Likewise.
---
 gcc/config/gcn/gcn.c | 82 ++++++++++++++++++++++++++++++++++++++++----
 gcc/config/gcn/gcn.h | 10 +++---
 2 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 283a91fe50a..3ab16548aad 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -2649,6 +2649,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
   rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
   HOST_WIDE_INT exec_set = 0;
   int offreg_set = 0;
+  auto_vec<int> saved_sgprs;
 
   start_sequence ();
 
@@ -2665,7 +2666,10 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
 	int lane = saved_scalars % 64;
 
 	if (prologue)
-	  emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
+	  {
+	    emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
+	    saved_sgprs.safe_push (regno);
+	  }
 	else
 	  emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
 
@@ -2698,7 +2702,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
 				  gcn_gen_undef (V64SImode), exec));
 
   /* Move vectors.  */
-  for (regno = FIRST_VGPR_REG, offset = offsets->pretend_size;
+  for (regno = FIRST_VGPR_REG, offset = 0;
        regno < FIRST_PSEUDO_REGISTER; regno++)
     if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
 	|| (regno == VGPR_REGNO (6) && saved_scalars > 0)
@@ -2719,8 +2723,67 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
 	  }
 
 	if (prologue)
-	  emit_insn (gen_scatterv64si_insn_1offset_exec (vsp, const0_rtx, reg,
-							 as, const0_rtx, exec));
+	  {
+	    rtx insn = emit_insn (gen_scatterv64si_insn_1offset_exec
+				  (vsp, const0_rtx, reg, as, const0_rtx,
+				   exec));
+
+	    /* Add CFI metadata.  */
+	    rtx note;
+	    if (regno == VGPR_REGNO (6) || regno == VGPR_REGNO (7))
+	      {
+		int start = (regno == VGPR_REGNO (7) ? 64 : 0);
+		int count = MIN (saved_scalars - start, 64);
+		int add_lr = (regno == VGPR_REGNO (6)
+			      && df_regs_ever_live_p (LINK_REGNUM));
+		int lrdest = -1;
+		rtvec seq = rtvec_alloc (count + add_lr);
+
+		/* Add an REG_FRAME_RELATED_EXPR entry for each scalar
+		   register that was saved in this batch.  */
+		for (int idx = 0; idx < count; idx++)
+		  {
+		    int stackaddr = offset + idx * 4;
+		    rtx dest = gen_rtx_MEM (SImode,
+					    gen_rtx_PLUS
+					    (DImode, sp,
+					     GEN_INT (stackaddr)));
+		    rtx src = gen_rtx_REG (SImode, saved_sgprs[start + idx]);
+		    rtx set = gen_rtx_SET (dest, src);
+		    RTX_FRAME_RELATED_P (set) = 1;
+		    RTVEC_ELT (seq, idx) = set;
+
+		    if (saved_sgprs[start + idx] == LINK_REGNUM)
+		      lrdest = stackaddr;
+		  }
+
+		/* Add an additional expression for DWARF_LINK_REGISTER if
+		   LINK_REGNUM was saved.  */
+		if (lrdest != -1)
+		  {
+		    rtx dest = gen_rtx_MEM (DImode,
+					    gen_rtx_PLUS
+					    (DImode, sp,
+					     GEN_INT (lrdest)));
+		    rtx src = gen_rtx_REG (DImode, DWARF_LINK_REGISTER);
+		    rtx set = gen_rtx_SET (dest, src);
+		    RTX_FRAME_RELATED_P (set) = 1;
+		    RTVEC_ELT (seq, count) = set;
+		  }
+
+		note = gen_rtx_SEQUENCE (VOIDmode, seq);
+	      }
+	    else
+	      {
+		rtx dest = gen_rtx_MEM (V64SImode,
+					gen_rtx_PLUS (DImode, sp,
+						      GEN_INT (offset)));
+		rtx src = gen_rtx_REG (V64SImode, regno);
+		note = gen_rtx_SET (dest, src);
+	      }
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
+	  }
 	else
 	  emit_insn (gen_gatherv64si_insn_1offset_exec
 		     (reg, vsp, const0_rtx, as, const0_rtx,
@@ -3224,8 +3287,7 @@ gcn_cannot_copy_insn_p (rtx_insn *insn)
 static enum unwind_info_type
 gcn_debug_unwind_info ()
 {
-  /* No support for debug info, yet.  */
-  return UI_NONE;
+  return UI_DWARF2;
 }
 
 /* Determine if there is a suitable hardware conversion instruction.
@@ -6214,6 +6276,8 @@ gcn_dwarf_register_number (unsigned int regno)
     return 768;  */
   else if (regno == SCC_REG)
     return 128;
+  else if (regno == DWARF_LINK_REGISTER)
+    return 16;
   else if (SGPR_REGNO_P (regno))
     {
       if (regno - FIRST_SGPR_REG < 64)
@@ -6243,8 +6307,12 @@ gcn_dwarf_register_span (rtx rtl)
   if (GET_MODE_SIZE (mode) != 8)
     return NULL_RTX;
 
-  rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
   unsigned regno = REGNO (rtl);
+
+  if (regno == DWARF_LINK_REGISTER)
+    return NULL_RTX;
+
+  rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
   XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
   XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
 
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index eba4646f1bf..4992a4c02ef 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -85,6 +85,7 @@
 #define FIRST_PARM_OFFSET(FNDECL)    0
 #define DYNAMIC_CHAIN_ADDRESS(FP)    plus_constant (Pmode, (FP), -16)
 #define INCOMING_RETURN_ADDR_RTX     gen_rtx_REG (Pmode, LINK_REGNUM)
+#define DWARF_FRAME_RETURN_COLUMN    16
 #define STACK_DYNAMIC_OFFSET(FNDECL) (-crtl->outgoing_args_size)
 #define ACCUMULATE_OUTGOING_ARGS     1
 #define RETURN_ADDR_RTX(COUNT,FRAMEADDR) \
@@ -135,7 +136,8 @@
 #define WORK_ITEM_ID_Z_REG	  162
 #define SOFT_ARG_REG		  416
 #define FRAME_POINTER_REGNUM	  418
-#define FIRST_PSEUDO_REGISTER	  420
+#define DWARF_LINK_REGISTER	  420
+#define FIRST_PSEUDO_REGISTER	  421
 
 #define FIRST_PARM_REG 24
 #define NUM_PARM_REGS  6
@@ -197,7 +199,7 @@
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     /* Other registers.  */			    \
-    1, 1, 1, 1					    \
+    1, 1, 1, 1, 1				    \
 }
 
 #define CALL_USED_REGISTERS {			    \
@@ -235,7 +237,7 @@
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     /* Other registers.  */			    \
-    1, 1, 1, 1					    \
+    1, 1, 1, 1, 1				    \
 }
 
 \f
@@ -514,7 +516,7 @@ enum gcn_address_spaces
     "v236", "v237", "v238", "v239", "v240", "v241", "v242", "v243", "v244", \
     "v245", "v246", "v247", "v248", "v249", "v250", "v251", "v252", "v253", \
     "v254", "v255",							    \
-    "?ap0", "?ap1", "?fp0", "?fp1" }
+    "?ap0", "?ap1", "?fp0", "?fp1", "?dwlr" }
 
 #define PRINT_OPERAND(FILE, X, CODE)  print_operand(FILE, X, CODE)
 #define PRINT_OPERAND_ADDRESS(FILE, ADDR)  print_operand_address (FILE, ADDR)
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] [amdgcn] Use frame pointer for CFA expressions.
  2021-06-22 17:14 [PATCH 0/3] [amdgcn] Improve debug support Hafiz Abid Qadeer
  2021-06-22 17:14 ` [PATCH 1/3] [amdgcn] Update CFI configuration Hafiz Abid Qadeer
@ 2021-06-22 17:14 ` Hafiz Abid Qadeer
  2021-06-23  9:32   ` Andrew Stubbs
  2021-06-22 17:14 ` [PATCH 3/3] [amdgcn] Add hook for DWARF address spaces Hafiz Abid Qadeer
  2 siblings, 1 reply; 7+ messages in thread
From: Hafiz Abid Qadeer @ 2021-06-22 17:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: ams, abidh

As size of address is bigger than registers in amdgcn, we are forced to use
DW_CFA_def_cfa_expression to make an expression that concatenates multiple
registers for the value of the CFA.  This then prohibits us from using many
of the dwarf ops which expect CFA rule to be a single regsiter plus an offset.
Using frame pointer in the CFA rule is only real possibility as it is saved
in every frame and it is easy to unwind its value.

So unless user gives fomit-frame-pointer, we use frame pointer for the
cfi information.  This options also has a different default now.

gcc/

	* common/config/gcn/gcn-common.c
	(gcn_option_optimization_table): Change OPT_fomit_frame_pointer to -O3.
	(gcn_expand_prologue): Prefer the frame pointer when emitting CFI.
	(gcn_frame_pointer_rqd): New function.
	(TARGET_FRAME_POINTER_REQUIRED): New hook.
---
 gcc/common/config/gcn/gcn-common.c |  2 +-
 gcc/config/gcn/gcn.c               | 60 +++++++++++++++++++++++-------
 2 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/gcc/common/config/gcn/gcn-common.c b/gcc/common/config/gcn/gcn-common.c
index 305c310f940..695eb467e34 100644
--- a/gcc/common/config/gcn/gcn-common.c
+++ b/gcc/common/config/gcn/gcn-common.c
@@ -27,7 +27,7 @@
 /* Set default optimization options.  */
 static const struct default_options gcn_option_optimization_table[] =
   {
-    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_3_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 3ab16548aad..0eac3aa3844 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -2900,10 +2900,14 @@ gcn_expand_prologue ()
 	  rtx adjustment = gen_int_mode (sp_adjust, SImode);
 	  rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
 							 adjustment, scc));
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
-			gen_rtx_SET (sp,
-				     gen_rtx_PLUS (DImode, sp, adjustment)));
+	  if (!offsets->need_frame_pointer)
+	    {
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			    gen_rtx_SET (sp,
+					 gen_rtx_PLUS (DImode, sp,
+						       adjustment)));
+	    }
 	  emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
 	}
 
@@ -2917,25 +2921,24 @@ gcn_expand_prologue ()
 	  rtx adjustment = gen_int_mode (fp_adjust, SImode);
 	  rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
 							adjustment, scc));
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
-			gen_rtx_SET (fp,
-				     gen_rtx_PLUS (DImode, sp, adjustment)));
 	  emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
 					 (fp_adjust < 0 ? GEN_INT (-1)
 					  : const0_rtx),
 					 scc, scc));
+
+	  /* Set the CFA to the entry stack address, as an offset from the
+	     frame pointer.  This is preferred because the frame pointer is
+	     saved in each frame, whereas the stack pointer is not.  */
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			gen_rtx_PLUS (DImode, fp,
+				      GEN_INT (-(offsets->pretend_size
+						 + offsets->callee_saves))));
 	}
 
       rtx_insn *seq = get_insns ();
       end_sequence ();
 
-      /* FIXME: Prologue insns should have this flag set for debug output, etc.
-	 but it causes issues for now.
-      for (insn = seq; insn; insn = NEXT_INSN (insn))
-        if (INSN_P (insn))
-	  RTX_FRAME_RELATED_P (insn) = 1;*/
-
       emit_insn (seq);
     }
   else
@@ -3011,6 +3014,16 @@ gcn_expand_prologue ()
 		    gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
 						   dbg_adjustment)));
 
+      if (offsets->need_frame_pointer)
+	{
+	  /* Set the CFA to the entry stack address, as an offset from the
+	     frame pointer.  This is necessary when alloca is used, and
+	     harmless otherwise.  */
+	  rtx neg_adjust = gen_int_mode (-offsets->callee_saves, DImode);
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			gen_rtx_PLUS (DImode, fp, neg_adjust));
+	}
+
       /* Make sure the flat scratch reg doesn't get optimised away.  */
       emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
     }
@@ -3114,6 +3127,23 @@ gcn_expand_epilogue (void)
   emit_jump_insn (gen_gcn_return ());
 }
 
+/* Implement TARGET_FRAME_POINTER_REQUIRED.
+
+   Return true if the frame pointer should not be eliminated.  */
+
+bool
+gcn_frame_pointer_rqd (void)
+{
+  /* GDB needs the frame pointer in order to unwind properly,
+     but that's not important for the entry point, unless alloca is used.
+     It's not important for code execution, so we should repect the
+     -fomit-frame-pointer flag.  */
+  return (!flag_omit_frame_pointer
+	  && cfun
+	  && (cfun->calls_alloca
+	      || (cfun->machine && cfun->machine->normal_function)));
+}
+
 /* Implement TARGET_CAN_ELIMINATE.
  
    Return true if the compiler is allowed to try to replace register number
@@ -6373,6 +6403,8 @@ gcn_dwarf_register_span (rtx rtl)
 #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
 #undef  TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN gcn_expand_builtin
+#undef  TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED gcn_frame_pointer_rqd
 #undef  TARGET_FUNCTION_ARG
 #undef  TARGET_FUNCTION_ARG_ADVANCE
 #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] [amdgcn] Add hook for DWARF address spaces.
  2021-06-22 17:14 [PATCH 0/3] [amdgcn] Improve debug support Hafiz Abid Qadeer
  2021-06-22 17:14 ` [PATCH 1/3] [amdgcn] Update CFI configuration Hafiz Abid Qadeer
  2021-06-22 17:14 ` [PATCH 2/3] [amdgcn] Use frame pointer for CFA expressions Hafiz Abid Qadeer
@ 2021-06-22 17:14 ` Hafiz Abid Qadeer
  2021-06-23  9:32   ` Andrew Stubbs
  2 siblings, 1 reply; 7+ messages in thread
From: Hafiz Abid Qadeer @ 2021-06-22 17:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: ams, abidh

Map GCN address spaces to the proposed DWARF address spaces defined by AMD at
https://llvm.org/docs/AMDGPUUsage.html#amdgpu-dwarf-address-class-mapping-table

gcc/

	* config/gcn/gcn.c: Include dwarf2.h.
	(gcn_addr_space_debug): New function.
	(TARGET_ADDR_SPACE_DEBUG): New hook.
---
 gcc/config/gcn/gcn.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 0eac3aa3844..25996dc83de 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -50,6 +50,7 @@
 #include "varasm.h"
 #include "intl.h"
 #include "rtl-iter.h"
+#include "dwarf2.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -1497,6 +1498,32 @@ gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
     gcc_unreachable ();
 }
 
+/* Implement TARGET_ADDR_SPACE_DEBUG.
+
+   Return the dwarf address space class for each hardware address space.  */
+
+static int
+gcn_addr_space_debug (addr_space_t as)
+{
+  switch (as)
+    {
+      case ADDR_SPACE_DEFAULT:
+      case ADDR_SPACE_FLAT:
+      case ADDR_SPACE_SCALAR_FLAT:
+      case ADDR_SPACE_FLAT_SCRATCH:
+	return DW_ADDR_none;
+      case ADDR_SPACE_GLOBAL:
+	return 1;      // DW_ADDR_LLVM_global
+      case ADDR_SPACE_LDS:
+	return 3;      // DW_ADDR_LLVM_group
+      case ADDR_SPACE_SCRATCH:
+	return 4;      // DW_ADDR_LLVM_private
+      case ADDR_SPACE_GDS:
+	return 0x8000; // DW_ADDR_AMDGPU_region
+    }
+  gcc_unreachable ();
+}
+
 
 /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
    
@@ -6354,6 +6381,8 @@ gcn_dwarf_register_span (rtx rtl)
 
 #undef  TARGET_ADDR_SPACE_ADDRESS_MODE
 #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
+#undef  TARGET_ADDR_SPACE_DEBUG
+#define TARGET_ADDR_SPACE_DEBUG gcn_addr_space_debug
 #undef  TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
   gcn_addr_space_legitimate_address_p
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] [amdgcn] Update CFI configuration
  2021-06-22 17:14 ` [PATCH 1/3] [amdgcn] Update CFI configuration Hafiz Abid Qadeer
@ 2021-06-23  9:32   ` Andrew Stubbs
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Stubbs @ 2021-06-23  9:32 UTC (permalink / raw)
  To: Hafiz Abid Qadeer, gcc-patches

On 22/06/2021 18:14, Hafiz Abid Qadeer wrote:
> Currently we don't get any call frame information for the amdgcn target.
> This patch makes necessary adjustments to generate CFI that can work with
> ROCGDB (ROCm 3.8+).
> 
> gcc/
> 
> 	* config/gcn/gcn.c (move_callee_saved_registers): Emit CFI notes for
> 	prologue register saves.
> 	(gcn_debug_unwind_info): Use UI_DWARF2.
> 	(gcn_dwarf_register_number): Map DWARF_LINK_REGISTER to DWARF PC.
> 	(gcn_dwarf_register_span): DWARF_LINK_REGISTER doesn't span.
> 	* config/gcn/gcn.h: (DWARF_FRAME_RETURN_COLUMN): New define.
> 	(DWARF_LINK_REGISTER): New define.
> 	(FIRST_PSEUDO_REGISTER): Increment.
> 	(FIXED_REGISTERS): Add entry for DWARF_LINK_REGISTER.
> 	(CALL_USED_REGISTERS): Likewise.
> 	(REGISTER_NAMES): Likewise.
> ---
>   gcc/config/gcn/gcn.c | 82 ++++++++++++++++++++++++++++++++++++++++----
>   gcc/config/gcn/gcn.h | 10 +++---
>   2 files changed, 81 insertions(+), 11 deletions(-)
> 
> diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
> index 283a91fe50a..3ab16548aad 100644
> --- a/gcc/config/gcn/gcn.c
> +++ b/gcc/config/gcn/gcn.c
> @@ -2649,6 +2649,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
>     rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
>     HOST_WIDE_INT exec_set = 0;
>     int offreg_set = 0;
> +  auto_vec<int> saved_sgprs;
>   
>     start_sequence ();
>   
> @@ -2665,7 +2666,10 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
>   	int lane = saved_scalars % 64;
>   
>   	if (prologue)
> -	  emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
> +	  {
> +	    emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
> +	    saved_sgprs.safe_push (regno);
> +	  }
>   	else
>   	  emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
>   
> @@ -2698,7 +2702,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
>   				  gcn_gen_undef (V64SImode), exec));
>   
>     /* Move vectors.  */
> -  for (regno = FIRST_VGPR_REG, offset = offsets->pretend_size;
> +  for (regno = FIRST_VGPR_REG, offset = 0;
>          regno < FIRST_PSEUDO_REGISTER; regno++)
>       if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
>   	|| (regno == VGPR_REGNO (6) && saved_scalars > 0)
> @@ -2719,8 +2723,67 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
>   	  }
>   
>   	if (prologue)
> -	  emit_insn (gen_scatterv64si_insn_1offset_exec (vsp, const0_rtx, reg,
> -							 as, const0_rtx, exec));
> +	  {
> +	    rtx insn = emit_insn (gen_scatterv64si_insn_1offset_exec
> +				  (vsp, const0_rtx, reg, as, const0_rtx,
> +				   exec));
> +
> +	    /* Add CFI metadata.  */
> +	    rtx note;
> +	    if (regno == VGPR_REGNO (6) || regno == VGPR_REGNO (7))
> +	      {
> +		int start = (regno == VGPR_REGNO (7) ? 64 : 0);
> +		int count = MIN (saved_scalars - start, 64);
> +		int add_lr = (regno == VGPR_REGNO (6)
> +			      && df_regs_ever_live_p (LINK_REGNUM));
> +		int lrdest = -1;
> +		rtvec seq = rtvec_alloc (count + add_lr);
> +
> +		/* Add an REG_FRAME_RELATED_EXPR entry for each scalar
> +		   register that was saved in this batch.  */
> +		for (int idx = 0; idx < count; idx++)
> +		  {
> +		    int stackaddr = offset + idx * 4;
> +		    rtx dest = gen_rtx_MEM (SImode,
> +					    gen_rtx_PLUS
> +					    (DImode, sp,
> +					     GEN_INT (stackaddr)));
> +		    rtx src = gen_rtx_REG (SImode, saved_sgprs[start + idx]);
> +		    rtx set = gen_rtx_SET (dest, src);
> +		    RTX_FRAME_RELATED_P (set) = 1;
> +		    RTVEC_ELT (seq, idx) = set;
> +
> +		    if (saved_sgprs[start + idx] == LINK_REGNUM)
> +		      lrdest = stackaddr;
> +		  }
> +
> +		/* Add an additional expression for DWARF_LINK_REGISTER if
> +		   LINK_REGNUM was saved.  */
> +		if (lrdest != -1)
> +		  {
> +		    rtx dest = gen_rtx_MEM (DImode,
> +					    gen_rtx_PLUS
> +					    (DImode, sp,
> +					     GEN_INT (lrdest)));
> +		    rtx src = gen_rtx_REG (DImode, DWARF_LINK_REGISTER);
> +		    rtx set = gen_rtx_SET (dest, src);
> +		    RTX_FRAME_RELATED_P (set) = 1;
> +		    RTVEC_ELT (seq, count) = set;
> +		  }
> +
> +		note = gen_rtx_SEQUENCE (VOIDmode, seq);
> +	      }
> +	    else
> +	      {
> +		rtx dest = gen_rtx_MEM (V64SImode,
> +					gen_rtx_PLUS (DImode, sp,
> +						      GEN_INT (offset)));
> +		rtx src = gen_rtx_REG (V64SImode, regno);
> +		note = gen_rtx_SET (dest, src);
> +	      }
> +	    RTX_FRAME_RELATED_P (insn) = 1;
> +	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
> +	  }
>   	else
>   	  emit_insn (gen_gatherv64si_insn_1offset_exec
>   		     (reg, vsp, const0_rtx, as, const0_rtx,
> @@ -3224,8 +3287,7 @@ gcn_cannot_copy_insn_p (rtx_insn *insn)
>   static enum unwind_info_type
>   gcn_debug_unwind_info ()
>   {
> -  /* No support for debug info, yet.  */
> -  return UI_NONE;
> +  return UI_DWARF2;
>   }
>   
>   /* Determine if there is a suitable hardware conversion instruction.
> @@ -6214,6 +6276,8 @@ gcn_dwarf_register_number (unsigned int regno)
>       return 768;  */
>     else if (regno == SCC_REG)
>       return 128;
> +  else if (regno == DWARF_LINK_REGISTER)
> +    return 16;
>     else if (SGPR_REGNO_P (regno))
>       {
>         if (regno - FIRST_SGPR_REG < 64)
> @@ -6243,8 +6307,12 @@ gcn_dwarf_register_span (rtx rtl)
>     if (GET_MODE_SIZE (mode) != 8)
>       return NULL_RTX;
>   
> -  rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
>     unsigned regno = REGNO (rtl);
> +
> +  if (regno == DWARF_LINK_REGISTER)
> +    return NULL_RTX;
> +
> +  rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
>     XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
>     XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
>   
> diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
> index eba4646f1bf..4992a4c02ef 100644
> --- a/gcc/config/gcn/gcn.h
> +++ b/gcc/config/gcn/gcn.h
> @@ -85,6 +85,7 @@
>   #define FIRST_PARM_OFFSET(FNDECL)    0
>   #define DYNAMIC_CHAIN_ADDRESS(FP)    plus_constant (Pmode, (FP), -16)
>   #define INCOMING_RETURN_ADDR_RTX     gen_rtx_REG (Pmode, LINK_REGNUM)
> +#define DWARF_FRAME_RETURN_COLUMN    16
>   #define STACK_DYNAMIC_OFFSET(FNDECL) (-crtl->outgoing_args_size)
>   #define ACCUMULATE_OUTGOING_ARGS     1
>   #define RETURN_ADDR_RTX(COUNT,FRAMEADDR) \
> @@ -135,7 +136,8 @@
>   #define WORK_ITEM_ID_Z_REG	  162
>   #define SOFT_ARG_REG		  416
>   #define FRAME_POINTER_REGNUM	  418
> -#define FIRST_PSEUDO_REGISTER	  420
> +#define DWARF_LINK_REGISTER	  420
> +#define FIRST_PSEUDO_REGISTER	  421
>   
>   #define FIRST_PARM_REG 24
>   #define NUM_PARM_REGS  6
> @@ -197,7 +199,7 @@
>       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
>       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
>       /* Other registers.  */			    \
> -    1, 1, 1, 1					    \
> +    1, 1, 1, 1, 1				    \
>   }
>   
>   #define CALL_USED_REGISTERS {			    \
> @@ -235,7 +237,7 @@
>       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
>       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
>       /* Other registers.  */			    \
> -    1, 1, 1, 1					    \
> +    1, 1, 1, 1, 1				    \
>   }
>   
>   \f
> @@ -514,7 +516,7 @@ enum gcn_address_spaces
>       "v236", "v237", "v238", "v239", "v240", "v241", "v242", "v243", "v244", \
>       "v245", "v246", "v247", "v248", "v249", "v250", "v251", "v252", "v253", \
>       "v254", "v255",							    \
> -    "?ap0", "?ap1", "?fp0", "?fp1" }
> +    "?ap0", "?ap1", "?fp0", "?fp1", "?dwlr" }
>   
>   #define PRINT_OPERAND(FILE, X, CODE)  print_operand(FILE, X, CODE)
>   #define PRINT_OPERAND_ADDRESS(FILE, ADDR)  print_operand_address (FILE, ADDR)
> 

OK.

Andrew

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] [amdgcn] Use frame pointer for CFA expressions.
  2021-06-22 17:14 ` [PATCH 2/3] [amdgcn] Use frame pointer for CFA expressions Hafiz Abid Qadeer
@ 2021-06-23  9:32   ` Andrew Stubbs
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Stubbs @ 2021-06-23  9:32 UTC (permalink / raw)
  To: Hafiz Abid Qadeer, gcc-patches

On 22/06/2021 18:14, Hafiz Abid Qadeer wrote:
> As size of address is bigger than registers in amdgcn, we are forced to use
> DW_CFA_def_cfa_expression to make an expression that concatenates multiple
> registers for the value of the CFA.  This then prohibits us from using many
> of the dwarf ops which expect CFA rule to be a single regsiter plus an offset.
> Using frame pointer in the CFA rule is only real possibility as it is saved
> in every frame and it is easy to unwind its value.
> 
> So unless user gives fomit-frame-pointer, we use frame pointer for the
> cfi information.  This options also has a different default now.
> 
> gcc/
> 
> 	* common/config/gcn/gcn-common.c
> 	(gcn_option_optimization_table): Change OPT_fomit_frame_pointer to -O3.
> 	(gcn_expand_prologue): Prefer the frame pointer when emitting CFI.
> 	(gcn_frame_pointer_rqd): New function.
> 	(TARGET_FRAME_POINTER_REQUIRED): New hook.
> ---
>   gcc/common/config/gcn/gcn-common.c |  2 +-
>   gcc/config/gcn/gcn.c               | 60 +++++++++++++++++++++++-------
>   2 files changed, 47 insertions(+), 15 deletions(-)
> 
> diff --git a/gcc/common/config/gcn/gcn-common.c b/gcc/common/config/gcn/gcn-common.c
> index 305c310f940..695eb467e34 100644
> --- a/gcc/common/config/gcn/gcn-common.c
> +++ b/gcc/common/config/gcn/gcn-common.c
> @@ -27,7 +27,7 @@
>   /* Set default optimization options.  */
>   static const struct default_options gcn_option_optimization_table[] =
>     {
> -    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
> +    { OPT_LEVELS_3_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
>       { OPT_LEVELS_NONE, 0, NULL, 0 }
>     };
>   
> diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
> index 3ab16548aad..0eac3aa3844 100644
> --- a/gcc/config/gcn/gcn.c
> +++ b/gcc/config/gcn/gcn.c
> @@ -2900,10 +2900,14 @@ gcn_expand_prologue ()
>   	  rtx adjustment = gen_int_mode (sp_adjust, SImode);
>   	  rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
>   							 adjustment, scc));
> -	  RTX_FRAME_RELATED_P (insn) = 1;
> -	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
> -			gen_rtx_SET (sp,
> -				     gen_rtx_PLUS (DImode, sp, adjustment)));
> +	  if (!offsets->need_frame_pointer)
> +	    {
> +	      RTX_FRAME_RELATED_P (insn) = 1;
> +	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
> +			    gen_rtx_SET (sp,
> +					 gen_rtx_PLUS (DImode, sp,
> +						       adjustment)));
> +	    }
>   	  emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
>   	}
>   
> @@ -2917,25 +2921,24 @@ gcn_expand_prologue ()
>   	  rtx adjustment = gen_int_mode (fp_adjust, SImode);
>   	  rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
>   							adjustment, scc));
> -	  RTX_FRAME_RELATED_P (insn) = 1;
> -	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
> -			gen_rtx_SET (fp,
> -				     gen_rtx_PLUS (DImode, sp, adjustment)));
>   	  emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
>   					 (fp_adjust < 0 ? GEN_INT (-1)
>   					  : const0_rtx),
>   					 scc, scc));
> +
> +	  /* Set the CFA to the entry stack address, as an offset from the
> +	     frame pointer.  This is preferred because the frame pointer is
> +	     saved in each frame, whereas the stack pointer is not.  */
> +	  RTX_FRAME_RELATED_P (insn) = 1;
> +	  add_reg_note (insn, REG_CFA_DEF_CFA,
> +			gen_rtx_PLUS (DImode, fp,
> +				      GEN_INT (-(offsets->pretend_size
> +						 + offsets->callee_saves))));
>   	}
>   
>         rtx_insn *seq = get_insns ();
>         end_sequence ();
>   
> -      /* FIXME: Prologue insns should have this flag set for debug output, etc.
> -	 but it causes issues for now.
> -      for (insn = seq; insn; insn = NEXT_INSN (insn))
> -        if (INSN_P (insn))
> -	  RTX_FRAME_RELATED_P (insn) = 1;*/
> -
>         emit_insn (seq);
>       }
>     else
> @@ -3011,6 +3014,16 @@ gcn_expand_prologue ()
>   		    gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
>   						   dbg_adjustment)));
>   
> +      if (offsets->need_frame_pointer)
> +	{
> +	  /* Set the CFA to the entry stack address, as an offset from the
> +	     frame pointer.  This is necessary when alloca is used, and
> +	     harmless otherwise.  */
> +	  rtx neg_adjust = gen_int_mode (-offsets->callee_saves, DImode);
> +	  add_reg_note (insn, REG_CFA_DEF_CFA,
> +			gen_rtx_PLUS (DImode, fp, neg_adjust));
> +	}
> +
>         /* Make sure the flat scratch reg doesn't get optimised away.  */
>         emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
>       }
> @@ -3114,6 +3127,23 @@ gcn_expand_epilogue (void)
>     emit_jump_insn (gen_gcn_return ());
>   }
>   
> +/* Implement TARGET_FRAME_POINTER_REQUIRED.
> +
> +   Return true if the frame pointer should not be eliminated.  */
> +
> +bool
> +gcn_frame_pointer_rqd (void)
> +{
> +  /* GDB needs the frame pointer in order to unwind properly,
> +     but that's not important for the entry point, unless alloca is used.
> +     It's not important for code execution, so we should repect the
> +     -fomit-frame-pointer flag.  */
> +  return (!flag_omit_frame_pointer
> +	  && cfun
> +	  && (cfun->calls_alloca
> +	      || (cfun->machine && cfun->machine->normal_function)));
> +}
> +
>   /* Implement TARGET_CAN_ELIMINATE.
>    
>      Return true if the compiler is allowed to try to replace register number
> @@ -6373,6 +6403,8 @@ gcn_dwarf_register_span (rtx rtl)
>   #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
>   #undef  TARGET_EXPAND_BUILTIN
>   #define TARGET_EXPAND_BUILTIN gcn_expand_builtin
> +#undef  TARGET_FRAME_POINTER_REQUIRED
> +#define TARGET_FRAME_POINTER_REQUIRED gcn_frame_pointer_rqd
>   #undef  TARGET_FUNCTION_ARG
>   #undef  TARGET_FUNCTION_ARG_ADVANCE
>   #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
> 

OK.

Andrew

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] [amdgcn] Add hook for DWARF address spaces.
  2021-06-22 17:14 ` [PATCH 3/3] [amdgcn] Add hook for DWARF address spaces Hafiz Abid Qadeer
@ 2021-06-23  9:32   ` Andrew Stubbs
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Stubbs @ 2021-06-23  9:32 UTC (permalink / raw)
  To: Hafiz Abid Qadeer, gcc-patches

On 22/06/2021 18:14, Hafiz Abid Qadeer wrote:
> Map GCN address spaces to the proposed DWARF address spaces defined by AMD at
> https://llvm.org/docs/AMDGPUUsage.html#amdgpu-dwarf-address-class-mapping-table
> 
> gcc/
> 
> 	* config/gcn/gcn.c: Include dwarf2.h.
> 	(gcn_addr_space_debug): New function.
> 	(TARGET_ADDR_SPACE_DEBUG): New hook.
> ---
>   gcc/config/gcn/gcn.c | 29 +++++++++++++++++++++++++++++
>   1 file changed, 29 insertions(+)
> 
> diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
> index 0eac3aa3844..25996dc83de 100644
> --- a/gcc/config/gcn/gcn.c
> +++ b/gcc/config/gcn/gcn.c
> @@ -50,6 +50,7 @@
>   #include "varasm.h"
>   #include "intl.h"
>   #include "rtl-iter.h"
> +#include "dwarf2.h"
>   
>   /* This file should be included last.  */
>   #include "target-def.h"
> @@ -1497,6 +1498,32 @@ gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
>       gcc_unreachable ();
>   }
>   
> +/* Implement TARGET_ADDR_SPACE_DEBUG.
> +
> +   Return the dwarf address space class for each hardware address space.  */
> +
> +static int
> +gcn_addr_space_debug (addr_space_t as)
> +{
> +  switch (as)
> +    {
> +      case ADDR_SPACE_DEFAULT:
> +      case ADDR_SPACE_FLAT:
> +      case ADDR_SPACE_SCALAR_FLAT:
> +      case ADDR_SPACE_FLAT_SCRATCH:
> +	return DW_ADDR_none;
> +      case ADDR_SPACE_GLOBAL:
> +	return 1;      // DW_ADDR_LLVM_global
> +      case ADDR_SPACE_LDS:
> +	return 3;      // DW_ADDR_LLVM_group
> +      case ADDR_SPACE_SCRATCH:
> +	return 4;      // DW_ADDR_LLVM_private
> +      case ADDR_SPACE_GDS:
> +	return 0x8000; // DW_ADDR_AMDGPU_region
> +    }
> +  gcc_unreachable ();
> +}
> +
>   
>   /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
>      
> @@ -6354,6 +6381,8 @@ gcn_dwarf_register_span (rtx rtl)
>   
>   #undef  TARGET_ADDR_SPACE_ADDRESS_MODE
>   #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
> +#undef  TARGET_ADDR_SPACE_DEBUG
> +#define TARGET_ADDR_SPACE_DEBUG gcn_addr_space_debug
>   #undef  TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
>   #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
>     gcn_addr_space_legitimate_address_p
> 

OK.

Andrew

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-06-23  9:32 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-22 17:14 [PATCH 0/3] [amdgcn] Improve debug support Hafiz Abid Qadeer
2021-06-22 17:14 ` [PATCH 1/3] [amdgcn] Update CFI configuration Hafiz Abid Qadeer
2021-06-23  9:32   ` Andrew Stubbs
2021-06-22 17:14 ` [PATCH 2/3] [amdgcn] Use frame pointer for CFA expressions Hafiz Abid Qadeer
2021-06-23  9:32   ` Andrew Stubbs
2021-06-22 17:14 ` [PATCH 3/3] [amdgcn] Add hook for DWARF address spaces Hafiz Abid Qadeer
2021-06-23  9:32   ` Andrew Stubbs

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).