public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v4] x86-64: Find a scratch register for large model profiling
@ 2024-02-02 15:42 H.J. Lu
  2024-02-02 16:10 ` Jakub Jelinek
  0 siblings, 1 reply; 3+ messages in thread
From: H.J. Lu @ 2024-02-02 15:42 UTC (permalink / raw)
  To: gcc-patches; +Cc: rep.dot.nop, jakub

Changes in v4:

1. Remove pr113689-2.c.
2. Use df_get_live_out.

Changes in v3:

1. Remove r10_ok.

Changes in v2:

1. Add int_parameter_registers to machine_function to track integer
registers used for parameter passing.
2. Update x86_64_select_profile_regnum to try %r10 first and use an
caller-saved register, which isn't used for parameter passing.

---
2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
an caller-saved register, which isn't live, at entry for large model
profiling and sorry if we can't find one.

gcc/

	PR target/113689
	* config/i386/i386.cc (x86_64_select_profile_regnum): New.
	(x86_function_profiler): Call x86_64_select_profile_regnum to
	get a scratch register for large model profiling.
	* config/i386/i386.h (machine_function): Add
	int_parameter_registers.

gcc/testsuite/

	PR target/113689
	* gcc.target/i386/pr113689-1.c: New file.
	* gcc.target/i386/pr113689-2.c: Likewise.
---
 gcc/config/i386/i386.cc                    | 81 ++++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr113689-1.c | 49 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr113689-2.c | 41 +++++++++++
 3 files changed, 156 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..b27445b8371 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22749,6 +22749,39 @@ current_fentry_section (const char **name)
   return true;
 }
 
+/* Return an caller-saved register, which isn't live, at entry for
+   profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  /* Use %r10 if it isn't used by DRAP.  */
+  if (!crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG)
+    return R10_REG;
+
+  bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (GENERAL_REGNO_P (i)
+	&& i != R10_REG
+#ifdef NO_PROFILE_COUNTERS
+	&& (r11_ok || i != R11_REG)
+#else
+	&& i != R11_REG
+#endif
+	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
+	&& !fixed_regs[i]
+	&& call_used_regs[i]
+	&& !REGNO_REG_SET_P (reg_live, i))
+      return i;
+
+  sorry ("no register available for profiling %<-mcmodel=large%s%>",
+	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
+
+  return INVALID_REGNUM;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
    for profiling a function entry.  */
 void
@@ -22783,42 +22816,60 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
 #endif
 
+      int scratch;
+      const char *reg_prefix;
+      const char *reg;
+
       if (!TARGET_PECOFF)
 	{
 	  switch (ix86_cmodel)
 	    {
 	    case CM_LARGE:
-	      /* NB: R10 is caller-saved.  Although it can be used as a
-		 static chain register, it is preserved when calling
-		 mcount for nested functions.  */
+	      scratch = x86_64_select_profile_regnum (true);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
-		fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
-			       "\tcall\tr10\n", mcount_name);
+		fprintf (file,
+			 "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
+			 "\tcall\t%s%s\n",
+			 reg_prefix, reg, mcount_name, reg_prefix, reg);
 	      else
-		fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
-			 mcount_name);
+		fprintf (file,
+			 "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
+			 mcount_name, reg_prefix, reg, reg_prefix, reg);
 	      break;
 	    case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
+	      scratch = x86_64_select_profile_regnum (false);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
 		{
 		  fprintf (file, "1:movabs\tr11, "
 				 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
-		  fprintf (file, "\tlea\tr10, 1b[rip]\n");
-		  fprintf (file, "\tadd\tr10, r11\n");
+		  fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
 		  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
 			   mcount_name);
-		  fprintf (file, "\tadd\tr10, r11\n");
-		  fprintf (file, "\tcall\tr10\n");
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tcall\t%s%s\n",
+			   reg_prefix, reg);
 		  break;
 		}
 	      fprintf (file,
 		       "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
-	      fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
+	      fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
 	      fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
-	      fprintf (file, "\tcall\t*%%r10\n");
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\tcall\t*%%%s%s\n",
+		       reg_prefix, reg);
 #else
 	      sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
new file mode 100644
index 00000000000..c32445e0fc4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
@@ -0,0 +1,49 @@
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
+
+#include <stdarg.h>
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, int a3, int a4, int a5, int a6,
+     char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  if (a3 != 3)
+    __builtin_abort ();
+  if (a4 != 4)
+    __builtin_abort ();
+  if (a5 != 5)
+    __builtin_abort ();
+  if (a6 != 6)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z, ...)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  va_list ap;
+  va_start (ap, z);
+  int x = va_arg (ap, int);
+  if (x != 38)
+    __builtin_abort ();
+  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
+  va_end (ap);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16, 38);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-2.c b/gcc/testsuite/gcc.target/i386/pr113689-2.c
new file mode 100644
index 00000000000..fec5b171a39
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fpic -fprofile -mcmodel=large" } */
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, int a3, int a4, int a5, int a6,
+     char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  if (a3 != 3)
+    __builtin_abort ();
+  if (a4 != 4)
+    __builtin_abort ();
+  if (a5 != 5)
+    __builtin_abort ();
+  if (a6 != 6)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16);
+  return 0;
+}
-- 
2.43.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v4] x86-64: Find a scratch register for large model profiling
  2024-02-02 15:42 [PATCH v4] x86-64: Find a scratch register for large model profiling H.J. Lu
@ 2024-02-02 16:10 ` Jakub Jelinek
  2024-02-02 19:19   ` H.J. Lu
  0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2024-02-02 16:10 UTC (permalink / raw)
  To: H.J. Lu, Uros Bizjak; +Cc: gcc-patches, rep.dot.nop

On Fri, Feb 02, 2024 at 07:42:00AM -0800, H.J. Lu wrote:
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -22749,6 +22749,39 @@ current_fentry_section (const char **name)
>    return true;
>  }
>  
> +/* Return an caller-saved register, which isn't live, at entry for
> +   profile.  */
> +
> +static int
> +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> +{
> +  /* Use %r10 if it isn't used by DRAP.  */
> +  if (!crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG)

I'd really like to see flag_fentry != 0 || here, if the profiler is
emitted before the prologue (so before initializing the drap register),
%r10 is a fine choice.

> +    return R10_REG;
> +
> +  bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));

I meant at pro_and_epilogue time, but perhaps doing it here
can discover arguments of the function which are used somewhere in
the body too.

> +  int i;
> +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> +    if (GENERAL_REGNO_P (i)
> +	&& i != R10_REG
> +#ifdef NO_PROFILE_COUNTERS
> +	&& (r11_ok || i != R11_REG)
> +#else
> +	&& i != R11_REG
> +#endif
> +	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> +	&& !fixed_regs[i]
> +	&& call_used_regs[i]

I wonder if this shouldn't be && (call_used_regs[i] || X)
where X would cover registers known to be saved in the prologue
which aren't live from the prologue to the body (stuff like hard frame
pointer if used).
Because if the prologue say saves %r12 or %rbx to stack but doesn't
yet set it to something, why couldn't the profiler use it?
I'd expect cfun->machine contains something what has been saved there.

> +	&& !REGNO_REG_SET_P (reg_live, i))
> +      return i;
> +
> +  sorry ("no register available for profiling %<-mcmodel=large%s%>",
> +	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> +
> +  return INVALID_REGNUM;
> +}
> +
>  /* Output assembler code to FILE to increment profiler label # LABELNO
>     for profiling a function entry.  */
>  void
> @@ -22783,42 +22816,60 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
>  	fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
>  #endif
>  
> +      int scratch;
> +      const char *reg_prefix;
> +      const char *reg;
> +
>        if (!TARGET_PECOFF)
>  	{
>  	  switch (ix86_cmodel)
>  	    {
>  	    case CM_LARGE:
> -	      /* NB: R10 is caller-saved.  Although it can be used as a
> -		 static chain register, it is preserved when calling
> -		 mcount for nested functions.  */
> +	      scratch = x86_64_select_profile_regnum (true);
> +	      reg = hi_reg_name[scratch];
> +	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
>  	      if (ASSEMBLER_DIALECT == ASM_INTEL)
> -		fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
> -			       "\tcall\tr10\n", mcount_name);
> +		fprintf (file,
> +			 "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
> +			 "\tcall\t%s%s\n",
> +			 reg_prefix, reg, mcount_name, reg_prefix, reg);
>  	      else
> -		fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
> -			 mcount_name);
> +		fprintf (file,
> +			 "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
> +			 mcount_name, reg_prefix, reg, reg_prefix, reg);
>  	      break;
>  	    case CM_LARGE_PIC:
>  #ifdef NO_PROFILE_COUNTERS
> +	      scratch = x86_64_select_profile_regnum (false);
> +	      reg = hi_reg_name[scratch];
> +	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
>  	      if (ASSEMBLER_DIALECT == ASM_INTEL)
>  		{
>  		  fprintf (file, "1:movabs\tr11, "
>  				 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
> -		  fprintf (file, "\tlea\tr10, 1b[rip]\n");
> -		  fprintf (file, "\tadd\tr10, r11\n");
> +		  fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
> +			   reg_prefix, reg);
> +		  fprintf (file, "\tadd\t%s%s, r11\n",
> +			   reg_prefix, reg);
>  		  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
>  			   mcount_name);
> -		  fprintf (file, "\tadd\tr10, r11\n");
> -		  fprintf (file, "\tcall\tr10\n");
> +		  fprintf (file, "\tadd\t%s%s, r11\n",
> +			   reg_prefix, reg);
> +		  fprintf (file, "\tcall\t%s%s\n",
> +			   reg_prefix, reg);
>  		  break;
>  		}
>  	      fprintf (file,
>  		       "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
> -	      fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
> -	      fprintf (file, "\taddq\t%%r11, %%r10\n");
> +	      fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
> +		       reg_prefix, reg);
> +	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> +		       reg_prefix, reg);
>  	      fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
> -	      fprintf (file, "\taddq\t%%r11, %%r10\n");
> -	      fprintf (file, "\tcall\t*%%r10\n");
> +	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> +		       reg_prefix, reg);
> +	      fprintf (file, "\tcall\t*%%%s%s\n",
> +		       reg_prefix, reg);
>  #else
>  	      sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
>  #endif
> diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> new file mode 100644
> index 00000000000..c32445e0fc4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> @@ -0,0 +1,49 @@
> +/* { dg-do run { target { lp64 && fpic } } } */
> +/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
> +
> +#include <stdarg.h>
> +
> +__attribute__((noipa,noclone,noinline))

Just noipa instead of noipa,noclone,noinline, the former implies
noclone,noinline too.  In all tests.

And, eventually you want Uros to review this too.

	Jakub


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v4] x86-64: Find a scratch register for large model profiling
  2024-02-02 16:10 ` Jakub Jelinek
@ 2024-02-02 19:19   ` H.J. Lu
  0 siblings, 0 replies; 3+ messages in thread
From: H.J. Lu @ 2024-02-02 19:19 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Uros Bizjak, gcc-patches, rep.dot.nop

On Fri, Feb 02, 2024 at 05:10:05PM +0100, Jakub Jelinek wrote:
> On Fri, Feb 02, 2024 at 07:42:00AM -0800, H.J. Lu wrote:
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -22749,6 +22749,39 @@ current_fentry_section (const char **name)
> >    return true;
> >  }
> >  
> > +/* Return an caller-saved register, which isn't live, at entry for
> > +   profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  /* Use %r10 if it isn't used by DRAP.  */
> > +  if (!crtl->drap_reg || REGNO (crtl->drap_reg) != R10_REG)
> 
> I'd really like to see flag_fentry != 0 || here, if the profiler is
> emitted before the prologue (so before initializing the drap register),
> %r10 is a fine choice.

Fixed in v5.

> > +    return R10_REG;
> > +
> > +  bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
> 
> I meant at pro_and_epilogue time, but perhaps doing it here
> can discover arguments of the function which are used somewhere in
> the body too.

My patch works when an argument is unused in the function body.  The
the unused argument register will be used for profiler.  I will add
a testcase in v5 to verify it.

> 
> > +  int i;
> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +    if (GENERAL_REGNO_P (i)
> > +	&& i != R10_REG
> > +#ifdef NO_PROFILE_COUNTERS
> > +	&& (r11_ok || i != R11_REG)
> > +#else
> > +	&& i != R11_REG
> > +#endif
> > +	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> > +	&& !fixed_regs[i]
> > +	&& call_used_regs[i]
> 
> I wonder if this shouldn't be && (call_used_regs[i] || X)
> where X would cover registers known to be saved in the prologue
> which aren't live from the prologue to the body (stuff like hard frame
> pointer if used).
> Because if the prologue say saves %r12 or %rbx to stack but doesn't
> yet set it to something, why couldn't the profiler use it?
> I'd expect cfun->machine contains something what has been saved there.

Added

 /* Bit mask for integer registers saved on stack in prologue.  The
     lower 8 bits are for legacy registers and the upper 8 bits are
     for r8-r15.  */
  unsigned int saved_int_registers : 16;

to track them.

> 
> > +	&& !REGNO_REG_SET_P (reg_live, i))
> > +      return i;
> > +
> > +  sorry ("no register available for profiling %<-mcmodel=large%s%>",
> > +	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> > +
> > +  return INVALID_REGNUM;
> > +}
> > +
> >  /* Output assembler code to FILE to increment profiler label # LABELNO
> >     for profiling a function entry.  */
> >  void
> > @@ -22783,42 +22816,60 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
> >  	fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
> >  #endif
> >  
> > +      int scratch;
> > +      const char *reg_prefix;
> > +      const char *reg;
> > +
> >        if (!TARGET_PECOFF)
> >  	{
> >  	  switch (ix86_cmodel)
> >  	    {
> >  	    case CM_LARGE:
> > -	      /* NB: R10 is caller-saved.  Although it can be used as a
> > -		 static chain register, it is preserved when calling
> > -		 mcount for nested functions.  */
> > +	      scratch = x86_64_select_profile_regnum (true);
> > +	      reg = hi_reg_name[scratch];
> > +	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
> >  	      if (ASSEMBLER_DIALECT == ASM_INTEL)
> > -		fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
> > -			       "\tcall\tr10\n", mcount_name);
> > +		fprintf (file,
> > +			 "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
> > +			 "\tcall\t%s%s\n",
> > +			 reg_prefix, reg, mcount_name, reg_prefix, reg);
> >  	      else
> > -		fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
> > -			 mcount_name);
> > +		fprintf (file,
> > +			 "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
> > +			 mcount_name, reg_prefix, reg, reg_prefix, reg);
> >  	      break;
> >  	    case CM_LARGE_PIC:
> >  #ifdef NO_PROFILE_COUNTERS
> > +	      scratch = x86_64_select_profile_regnum (false);
> > +	      reg = hi_reg_name[scratch];
> > +	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
> >  	      if (ASSEMBLER_DIALECT == ASM_INTEL)
> >  		{
> >  		  fprintf (file, "1:movabs\tr11, "
> >  				 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
> > -		  fprintf (file, "\tlea\tr10, 1b[rip]\n");
> > -		  fprintf (file, "\tadd\tr10, r11\n");
> > +		  fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
> > +			   reg_prefix, reg);
> > +		  fprintf (file, "\tadd\t%s%s, r11\n",
> > +			   reg_prefix, reg);
> >  		  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
> >  			   mcount_name);
> > -		  fprintf (file, "\tadd\tr10, r11\n");
> > -		  fprintf (file, "\tcall\tr10\n");
> > +		  fprintf (file, "\tadd\t%s%s, r11\n",
> > +			   reg_prefix, reg);
> > +		  fprintf (file, "\tcall\t%s%s\n",
> > +			   reg_prefix, reg);
> >  		  break;
> >  		}
> >  	      fprintf (file,
> >  		       "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
> > -	      fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
> > -	      fprintf (file, "\taddq\t%%r11, %%r10\n");
> > +	      fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
> > +		       reg_prefix, reg);
> > +	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> > +		       reg_prefix, reg);
> >  	      fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
> > -	      fprintf (file, "\taddq\t%%r11, %%r10\n");
> > -	      fprintf (file, "\tcall\t*%%r10\n");
> > +	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
> > +		       reg_prefix, reg);
> > +	      fprintf (file, "\tcall\t*%%%s%s\n",
> > +		       reg_prefix, reg);
> >  #else
> >  	      sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
> >  #endif
> > diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> > new file mode 100644
> > index 00000000000..c32445e0fc4
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
> > @@ -0,0 +1,49 @@
> > +/* { dg-do run { target { lp64 && fpic } } } */
> > +/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
> > +
> > +#include <stdarg.h>
> > +
> > +__attribute__((noipa,noclone,noinline))
> 
> Just noipa instead of noipa,noclone,noinline, the former implies
> noclone,noinline too.  In all tests.

Fixed.

> 
> And, eventually you want Uros to review this too.

Will send v5 after testing.

Thanks.

H.J.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-02-02 19:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-02 15:42 [PATCH v4] x86-64: Find a scratch register for large model profiling H.J. Lu
2024-02-02 16:10 ` Jakub Jelinek
2024-02-02 19:19   ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).