public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86-64: Find a scratch register for large model profiling
@ 2024-02-01 18:15 H.J. Lu
  2024-02-01 18:31 ` Jakub Jelinek
  0 siblings, 1 reply; 5+ messages in thread
From: H.J. Lu @ 2024-02-01 18:15 UTC (permalink / raw)
  To: gcc-patches

2 scratch registers, %r10 and %r11, are available at function entry for
large model profiling.  But %r10 may be used by stack realignment and we
can't use %r10 in this case.  Add x86_64_select_profile_regnum to find
a scratch register for large model profiling and sorry if we can't find
one.

gcc/

	PR target/113689
	* config/i386/i386.cc (x86_64_select_profile_regnum): New.
	(x86_function_profiler): Call x86_64_select_profile_regnum to
	get a scratch register for large model profiling.

gcc/testsuite/

	PR target/113689
	* gcc.target/i386/pr113689-1.c: New file.
	* gcc.target/i386/pr113689-2.c: Likewise.
	* gcc.target/i386/pr113689-3.c: Likewise.
	* gcc.target/i386/pr98482-1.c: Updated.
	* gcc.target/i386/pr98482-2.c: Likewise.
---
 gcc/config/i386/i386.cc                    | 73 +++++++++++++++++-----
 gcc/testsuite/gcc.target/i386/pr113689-1.c | 41 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr113689-2.c | 32 ++++++++++
 gcc/testsuite/gcc.target/i386/pr113689-3.c | 24 +++++++
 gcc/testsuite/gcc.target/i386/pr98482-1.c  |  4 +-
 gcc/testsuite/gcc.target/i386/pr98482-2.c  |  2 +-
 6 files changed, 158 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113689-3.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b3e7c74846e..04f88a7162b 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
   return true;
 }
 
+/* Return an unused caller-saved register at entry for profile.  */
+
+static int
+x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
+{
+  int i;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (GENERAL_REGNO_P (i)
+#ifdef NO_PROFILE_COUNTERS
+	&& (r11_ok || i != R11_REG)
+#else
+	&& i != R11_REG
+#endif
+	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
+	&& !fixed_regs[i]
+	&& call_used_regs[i]
+	&& !df_regs_ever_live_p (i))
+      return i;
+
+  sorry ("No register available for profiling %<-mcmodel=large%s%>",
+	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
+
+  return INVALID_REGNUM;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
    for profiling a function entry.  */
 void
@@ -22783,42 +22808,60 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
 #endif
 
+      int scratch;
+      const char *reg_prefix;
+      const char *reg;
+
       if (!TARGET_PECOFF)
 	{
 	  switch (ix86_cmodel)
 	    {
 	    case CM_LARGE:
-	      /* NB: R10 is caller-saved.  Although it can be used as a
-		 static chain register, it is preserved when calling
-		 mcount for nested functions.  */
+	      scratch = x86_64_select_profile_regnum (true);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
-		fprintf (file, "1:\tmovabs\tr10, OFFSET FLAT:%s\n"
-			       "\tcall\tr10\n", mcount_name);
+		fprintf (file,
+			 "1:\tmovabs\t%s%s, OFFSET FLAT:%s\n"
+			 "\tcall\t%s%s\n",
+			 reg_prefix, reg, mcount_name, reg_prefix, reg);
 	      else
-		fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
-			 mcount_name);
+		fprintf (file,
+			 "1:\tmovabsq\t$%s, %%%s%s\n\tcall\t*%%%s%s\n",
+			 mcount_name, reg_prefix, reg, reg_prefix, reg);
 	      break;
 	    case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
+	      scratch = x86_64_select_profile_regnum (false);
+	      reg = hi_reg_name[scratch];
+	      reg_prefix = LEGACY_INT_REGNO_P (scratch) ? "r" : "";
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
 		{
 		  fprintf (file, "1:movabs\tr11, "
 				 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
-		  fprintf (file, "\tlea\tr10, 1b[rip]\n");
-		  fprintf (file, "\tadd\tr10, r11\n");
+		  fprintf (file, "\tlea\t%s%s, 1b[rip]\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
 		  fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
 			   mcount_name);
-		  fprintf (file, "\tadd\tr10, r11\n");
-		  fprintf (file, "\tcall\tr10\n");
+		  fprintf (file, "\tadd\t%s%s, r11\n",
+			   reg_prefix, reg);
+		  fprintf (file, "\tcall\t%s%s\n",
+			   reg_prefix, reg);
 		  break;
 		}
 	      fprintf (file,
 		       "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
-	      fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
+	      fprintf (file, "\tleaq\t1b(%%rip), %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
 	      fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
-	      fprintf (file, "\taddq\t%%r11, %%r10\n");
-	      fprintf (file, "\tcall\t*%%r10\n");
+	      fprintf (file, "\taddq\t%%r11, %%%s%s\n",
+		       reg_prefix, reg);
+	      fprintf (file, "\tcall\t*%%%s%s\n",
+		       reg_prefix, reg);
 #else
 	      sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-1.c b/gcc/testsuite/gcc.target/i386/pr113689-1.c
new file mode 100644
index 00000000000..97c40d051db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-1.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fno-pic -fprofile -mcmodel=large" } */
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, int a3, int a4, int a5, int a6,
+     char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  if (a3 != 3)
+    __builtin_abort ();
+  if (a4 != 4)
+    __builtin_abort ();
+  if (a5 != 5)
+    __builtin_abort ();
+  if (a6 != 6)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  bar (c, d, e, f, g, h, &b[0], __builtin_alloca (z), &z);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-2.c b/gcc/testsuite/gcc.target/i386/pr113689-2.c
new file mode 100644
index 00000000000..b0663b9bfe6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-2.c
@@ -0,0 +1,32 @@
+/* { dg-do run { target { lp64 && fpic } } } */
+/* { dg-options "-O2 -fpic -fprofile -mcmodel=large" } */
+
+__attribute__((noipa,noclone,noinline))
+void
+bar (int a1, int a2, char *x, char *y, int *z)
+{
+  if (a1 != 1)
+    __builtin_abort ();
+  if (a2 != 2)
+    __builtin_abort ();
+  x[0] = 42;
+  y[0] = 42;
+  if (z[0] != 16)
+    __builtin_abort ();
+}
+
+__attribute__((noipa,noclone,noinline))
+void 
+foo (int c, int d, int e, int f, int g, int h, int z)
+{
+  typedef char B[32];
+  B b __attribute__((aligned (32)));
+  bar (c, d, &b[0], __builtin_alloca (z), &z);
+}
+
+int
+main ()
+{
+  foo (1, 2, 3, 4, 5, 6, 16);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113689-3.c b/gcc/testsuite/gcc.target/i386/pr113689-3.c
new file mode 100644
index 00000000000..10099fc8d96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113689-3.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { lp64 && avx_runtime } } } */
+/* { dg-options "-O2 -fprofile -mcmodel=large -mavx" } */
+
+__attribute__((noipa,noclone,noinline))
+_BitInt(511)
+foo(_BitInt(7) a, _BitInt(511) b)
+{
+  ({
+    volatile _BitInt(511) d = (-b / a);
+    if (d != -1)
+      __builtin_abort();
+    d;
+  });
+  return b;
+}
+
+int
+main(void)
+{
+  _BitInt(511) x = foo(5, 5);
+  if (x != 5)
+    __builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr98482-1.c b/gcc/testsuite/gcc.target/i386/pr98482-1.c
index b1d9619275e..ccf068eef5b 100644
--- a/gcc/testsuite/gcc.target/i386/pr98482-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr98482-1.c
@@ -1,8 +1,8 @@
 /* { dg-do compile { target { *-*-linux* && lp64 } } } */
 /* { dg-require-effective-target mfentry } */
 /* { dg-options "-fprofile -mfentry -O2 -mcmodel=large" } */
-/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__, %r10\n\tcall\t\\*%r10" { target nonpic } } } */
-/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %r10\n\tcall\t\\*%r10" { target { ! nonpic } } } } */
+/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__, %rax\n\tcall\t\\*%rax" { target nonpic } } } */
+/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %rdx\n\tcall\t\\*%rax" { target { ! nonpic } } } } */
 
 void
 func (void)
diff --git a/gcc/testsuite/gcc.target/i386/pr98482-2.c b/gcc/testsuite/gcc.target/i386/pr98482-2.c
index 03c62a4b67b..7220445ce7f 100644
--- a/gcc/testsuite/gcc.target/i386/pr98482-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr98482-2.c
@@ -2,7 +2,7 @@
 /* { dg-require-effective-target mfentry } */
 /* { dg-require-effective-target fpic } */
 /* { dg-options "-fpic -fprofile -mfentry -O2 -mcmodel=large" } */
-/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %r10\n\tcall\t\\*%r10" } } */
+/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__@PLTOFF, %r11\n\taddq\t%r11, %rdx\n\tcall\t\\*%rdx" } } */
 
 void
 func (void)
-- 
2.43.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64: Find a scratch register for large model profiling
  2024-02-01 18:15 [PATCH] x86-64: Find a scratch register for large model profiling H.J. Lu
@ 2024-02-01 18:31 ` Jakub Jelinek
  2024-02-01 23:02   ` H.J. Lu
  0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2024-02-01 18:31 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
>    return true;
>  }
>  
> +/* Return an unused caller-saved register at entry for profile.  */
> +
> +static int
> +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> +{
> +  int i;

Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
behavior unless emitting profiler after prologue)?

> +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> +    if (GENERAL_REGNO_P (i)
> +#ifdef NO_PROFILE_COUNTERS
> +	&& (r11_ok || i != R11_REG)
> +#else
> +	&& i != R11_REG
> +#endif
> +	&& (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> +	&& !fixed_regs[i]
> +	&& call_used_regs[i]
> +	&& !df_regs_ever_live_p (i))

Also, isn't this too restrictive?
I mean, all we care about is whether there is some register
which is not live across the NOTE_INSN_PROLOG_END note, no?
I.e. doesn't contain any of function's argument that are used later,
and isn't set in the prologue to be used later.  E.g. call used
register which is just saved to stack in the prologue might be just fine.

> +      return i;
> +
> +  sorry ("No register available for profiling %<-mcmodel=large%s%>",

Diagnostics shouldn't start with capital letter.

> +	 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> +
> +  return INVALID_REGNUM;
> +}
> +
>  /* Output assembler code to FILE to increment profiler label # LABELNO
>     for profiling a function entry.  */
>  void

	Jakub


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64: Find a scratch register for large model profiling
  2024-02-01 18:31 ` Jakub Jelinek
@ 2024-02-01 23:02   ` H.J. Lu
  2024-02-02 12:07     ` rep.dot.nop
  0 siblings, 1 reply; 5+ messages in thread
From: H.J. Lu @ 2024-02-01 23:02 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
> >    return true;
> >  }
> >
> > +/* Return an unused caller-saved register at entry for profile.  */
> > +
> > +static int
> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> > +{
> > +  int i;
>
> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
> behavior unless emitting profiler after prologue)?

Fixed in v2.

> > +  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> > +    if (GENERAL_REGNO_P (i)
> > +#ifdef NO_PROFILE_COUNTERS
> > +     && (r11_ok || i != R11_REG)
> > +#else
> > +     && i != R11_REG
> > +#endif
> > +     && (!REX2_INT_REGNO_P (i) || TARGET_APX_EGPR)
> > +     && !fixed_regs[i]
> > +     && call_used_regs[i]
> > +     && !df_regs_ever_live_p (i))
>
> Also, isn't this too restrictive?
> I mean, all we care about is whether there is some register
> which is not live across the NOTE_INSN_PROLOG_END note, no?
> I.e. doesn't contain any of function's argument that are used later,
> and isn't set in the prologue to be used later.  E.g. call used
> register which is just saved to stack in the prologue might be just fine.

Fixed in v2.

> > +      return i;
> > +
> > +  sorry ("No register available for profiling %<-mcmodel=large%s%>",
>
> Diagnostics shouldn't start with capital letter.
>
> > +      ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
> > +
> > +  return INVALID_REGNUM;
> > +}
> > +
> >  /* Output assembler code to FILE to increment profiler label # LABELNO
> >     for profiling a function entry.  */
> >  void
>
>         Jakub
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64: Find a scratch register for large model profiling
  2024-02-01 23:02   ` H.J. Lu
@ 2024-02-02 12:07     ` rep.dot.nop
  2024-02-02 12:22       ` H.J. Lu
  0 siblings, 1 reply; 5+ messages in thread
From: rep.dot.nop @ 2024-02-02 12:07 UTC (permalink / raw)
  To: gcc-patches, H.J. Lu, Jakub Jelinek

On 2 February 2024 00:02:54 CET, "H.J. Lu" <hjl.tools@gmail.com> wrote:
>On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek <jakub@redhat.com> wrote:
>>
>> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
>> > --- a/gcc/config/i386/i386.cc
>> > +++ b/gcc/config/i386/i386.cc
>> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
>> >    return true;
>> >  }
>> >
>> > +/* Return an unused caller-saved register at entry for profile.  */
>> > +
>> > +static int
>> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
>> > +{
>> > +  int i;
>>
>> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
>> behavior unless emitting profiler after prologue)?
>
>Fixed in v2.

Nit: r10_ok is now superfluous, but lets wait for Jakub.
thanks

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] x86-64: Find a scratch register for large model profiling
  2024-02-02 12:07     ` rep.dot.nop
@ 2024-02-02 12:22       ` H.J. Lu
  0 siblings, 0 replies; 5+ messages in thread
From: H.J. Lu @ 2024-02-02 12:22 UTC (permalink / raw)
  To: rep.dot.nop; +Cc: gcc-patches, Jakub Jelinek

On Fri, Feb 2, 2024 at 4:07 AM <rep.dot.nop@gmail.com> wrote:
>
> On 2 February 2024 00:02:54 CET, "H.J. Lu" <hjl.tools@gmail.com> wrote:
> >On Thu, Feb 1, 2024 at 10:32 AM Jakub Jelinek <jakub@redhat.com> wrote:
> >>
> >> On Thu, Feb 01, 2024 at 10:15:30AM -0800, H.J. Lu wrote:
> >> > --- a/gcc/config/i386/i386.cc
> >> > +++ b/gcc/config/i386/i386.cc
> >> > @@ -22749,6 +22749,31 @@ current_fentry_section (const char **name)
> >> >    return true;
> >> >  }
> >> >
> >> > +/* Return an unused caller-saved register at entry for profile.  */
> >> > +
> >> > +static int
> >> > +x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
> >> > +{
> >> > +  int i;
> >>
> >> Why not just return R10_REG here if flag_entry != 0 (i.e. keep existing
> >> behavior unless emitting profiler after prologue)?
> >
> >Fixed in v2.
>
> Nit: r10_ok is now superfluous, but lets wait for Jakub.
> thanks

Fixed in v3.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-02-02 12:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-01 18:15 [PATCH] x86-64: Find a scratch register for large model profiling H.J. Lu
2024-02-01 18:31 ` Jakub Jelinek
2024-02-01 23:02   ` H.J. Lu
2024-02-02 12:07     ` rep.dot.nop
2024-02-02 12:22       ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).