public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Nathan Froyd <froydnj@codesourcery.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>, Jan Hubicka <jh@suse.cz>
Subject: Re: [PATCH,i386] fix PR 11001
Date: Fri, 12 Oct 2007 16:13:00 -0000	[thread overview]
Message-ID: <20071012161246.GE11809@codesourcery.com> (raw)
In-Reply-To: <5787cf470710042259x3428b010y7d8bb8ac189157a0@mail.gmail.com>

On Fri, Oct 05, 2007 at 07:59:04AM +0200, Uros Bizjak wrote:
> Sorry for not being clear here. TARGET_INLINE_ALL_STRINGOPS* should be
> respected, and libcalls should not be generated in this case. For
> example, we use -minline-all-stringops to build crtfastmath.o. For
> some targets, we don't  want to link against libc to use memset
> routine.
> 
> The patch is OK  with above change, but please wait a day or two for
> possilbe comments from Jan, as he is the original author of stringops
> code.

Thanks, the below is what I committed.

-Nathan

gcc/
	PR 11001
	* config/i386/i386.md (strmov): Check for esi and edi usage.
	* config/i386/i386.c (decide_alg): Check whether we can use a
	rep prefix and adjust algorithm choice accordingly.
	(ix86_expand_strlen): Check for eax, ecx, and edi usage.

gcc/testsuite/
	PR 11001
	* gcc.target/i386/pr11001-strlen-1.c: New testcase.
	* gcc.target/i386/pr11001-strlen-2.c: New testcase.
	* gcc.target/i386/pr11001-strlen-3.c: New testcase.
	* gcc.target/i386/pr11001-memset-1.c: New testcase.
	* gcc.target/i386/pr11001-memset-2.c: New testcase.
	* gcc.target/i386/pr11001-memset-3.c: New testcase.
	* gcc.target/i386/pr11001-memcpy-1.c: New testcase.
	* gcc.target/i386/pr11001-memcpy-2.c: New testcase.
	* gcc.target/i386/pr11001-memcpy-3.c: New testcase.

Index: gcc/testsuite/gcc.target/i386/pr11001-strlen-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-strlen-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-strlen-2.c	(revision 0)
@@ -0,0 +1,16 @@
+/* Ensure that we don't use 'repnz scasb' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-O1 -m32" } */
+
+extern __SIZE_TYPE__ strlen (const char *);
+extern void *malloc (__SIZE_TYPE__);
+
+register int regvar asm("%eax"); /* { dg-warning "call-clobbered register" } */
+
+char *
+do_copy (char *str)
+{
+  return malloc (strlen (str) + 1);
+}
+
+/* { dg-final { scan-assembler-not "repnz scasb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-memset-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-memset-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-memset-2.c	(revision 0)
@@ -0,0 +1,23 @@
+/* Ensure that we don't use 'rep stoX' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+  memset (foo, 0, sizeof foo);
+  memset (baz, 0, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep stosl" } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-strlen-3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-strlen-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-strlen-3.c	(revision 0)
@@ -0,0 +1,16 @@
+/* Ensure that we don't use 'repnz scasb' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-O1 -m32" } */
+
+extern __SIZE_TYPE__ strlen (const char *);
+extern void *malloc (__SIZE_TYPE__);
+
+register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
+
+char *
+do_copy (char *str)
+{
+  return malloc (strlen (str) + 1);
+}
+
+/* { dg-final { scan-assembler-not "repnz scasb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-memset-3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-memset-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-memset-3.c	(revision 0)
@@ -0,0 +1,23 @@
+/* Ensure that we don't use 'rep stoX' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+register int regvar asm("%edi");
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+  memset (foo, 0, sizeof foo);
+  memset (baz, 0, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep stosl" } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-memcpy-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-memcpy-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-memcpy-1.c	(revision 0)
@@ -0,0 +1,23 @@
+/* Ensure that we don't use 'rep movX' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+register int regvar asm("%esi");
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+  memcpy (foo, bar, sizeof foo);
+  memcpy (baz, quux, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep movsl" } } */
+/* { dg-final { scan-assembler-not "rep movsb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-memcpy-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-memcpy-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-memcpy-2.c	(revision 0)
@@ -0,0 +1,23 @@
+/* Ensure that we don't use 'rep movX' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+register int regvar asm("%edi");
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+  memcpy (foo, bar, sizeof foo);
+  memcpy (baz, quux, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep movsl" } } */
+/* { dg-final { scan-assembler-not "rep movsb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-memcpy-3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-memcpy-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-memcpy-3.c	(revision 0)
@@ -0,0 +1,23 @@
+/* Ensure that we don't use 'rep movX' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+  memcpy (foo, bar, sizeof foo);
+  memcpy (baz, quux, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep movsl" } } */
+/* { dg-final { scan-assembler-not "rep movsb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-strlen-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-strlen-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-strlen-1.c	(revision 0)
@@ -0,0 +1,16 @@
+/* Ensure that we don't use 'repnz scasb' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-O1 -m32" } */
+
+extern __SIZE_TYPE__ strlen (const char *);
+extern void *malloc (__SIZE_TYPE__);
+
+register int regvar asm("%edi");
+
+char *
+do_copy (char *str)
+{
+  return malloc (strlen (str) + 1);
+}
+
+/* { dg-final { scan-assembler-not "repnz scasb" } } */
Index: gcc/testsuite/gcc.target/i386/pr11001-memset-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr11001-memset-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr11001-memset-1.c	(revision 0)
@@ -0,0 +1,23 @@
+/* Ensure that we don't use 'rep stoX' in the presence of register globals.  */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+register int regvar asm("%eax"); /* { dg-warning "call-clobbered register" } */
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+  memset (foo, 0, sizeof foo);
+  memset (baz, 0, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep stosl" } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md	(revision 129263)
+++ gcc/config/i386/i386.md	(working copy)
@@ -18702,7 +18702,9 @@ (define_expand "strmov"
   operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
   operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
 
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
+  /* Can't use this if the user has appropriated esi or edi.  */
+  if ((TARGET_SINGLE_STRINGOP || optimize_size)
+      && !(global_regs[4] || global_regs[5]))
     {
       emit_insn (gen_strmov_singleop (operands[0], operands[1],
 				      operands[2], operands[3],
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 129263)
+++ gcc/config/i386/i386.c	(working copy)
@@ -15056,21 +15056,32 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
 	    int *dynamic_check)
 {
   const struct stringop_algs * algs;
+  /* Algorithms using the rep prefix want at least edi and ecx;
+     additionally, memset wants eax and memcpy wants esi.  Don't
+     consider such algorithms if the user has appropriated those
+     registers for their own purposes.	*/
+  bool rep_prefix_usable = !(global_regs[2] || global_regs[5]
+                             || (memset ? global_regs[0] : global_regs[4]));
+
+#define ALG_USABLE_P(alg) (rep_prefix_usable			\
+			   || (alg != rep_prefix_1_byte		\
+			       && alg != rep_prefix_4_byte      \
+			       && alg != rep_prefix_8_byte))
 
   *dynamic_check = -1;
   if (memset)
     algs = &ix86_cost->memset[TARGET_64BIT != 0];
   else
     algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
-  if (stringop_alg != no_stringop)
+  if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
     return stringop_alg;
   /* rep; movq or rep; movl is the smallest variant.  */
   else if (optimize_size)
     {
       if (!count || (count & 3))
-	return rep_prefix_1_byte;
+	return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
       else
-	return rep_prefix_4_byte;
+	return rep_prefix_usable ? rep_prefix_4_byte : loop;
     }
   /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
    */
@@ -15082,27 +15093,34 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
       enum stringop_alg alg = libcall;
       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
 	{
-	  gcc_assert (algs->size[i].max);
+	  /* We get here if the algorithms that were not libcall-based
+	     were rep-prefix based and we are unable to use rep prefixes
+	     based on global register usage.  Break out of the loop and
+	     use the heuristic below.  */
+	  if (algs->size[i].max == 0)
+	    break;
 	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
 	    {
-	      if (algs->size[i].alg != libcall)
-		alg = algs->size[i].alg;
+	      enum stringop_alg candidate = algs->size[i].alg;
+
+	      if (candidate != libcall && ALG_USABLE_P (candidate))
+		alg = candidate;
 	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
-	         last non-libcall inline algorithm.  */
+		 last non-libcall inline algorithm.  */
 	      if (TARGET_INLINE_ALL_STRINGOPS)
 		{
 		  /* When the current size is best to be copied by a libcall,
-		     but we are still forced to inline, run the heuristic bellow
+		     but we are still forced to inline, run the heuristic below
 		     that will pick code for medium sized blocks.  */
 		  if (alg != libcall)
 		    return alg;
 		  break;
 		}
-	      else
-		return algs->size[i].alg;
+	      else if (ALG_USABLE_P (candidate))
+		return candidate;
 	    }
 	}
-      gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
+      gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
     }
   /* When asked to inline the call anyway, try to pick meaningful choice.
      We look for maximal size of block that is faster to copy by hand and
@@ -15112,15 +15130,32 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
      If this turns out to be bad, we might simply specify the preferred
      choice in ix86_costs.  */
   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
-      && algs->unknown_size == libcall)
+      && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
     {
       int max = -1;
       enum stringop_alg alg;
       int i;
+      bool any_alg_usable_p = true;
 
       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
-	if (algs->size[i].alg != libcall && algs->size[i].alg)
-	  max = algs->size[i].max;
+        {
+          enum stringop_alg candidate = algs->size[i].alg;
+          any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
+
+          if (candidate != libcall && candidate
+              && ALG_USABLE_P (candidate))
+              max = algs->size[i].max;
+        }
+      /* If there aren't any usable algorithms, then recursing on
+         smaller sizes isn't going to find anything.  Just return the
+         simple byte-at-a-time copy loop.  */
+      if (!any_alg_usable_p)
+        {
+          /* Pick something reasonable.  */
+          if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+            *dynamic_check = 128;
+          return loop_1_byte;
+        }
       if (max == -1)
 	max = 4096;
       alg = decide_alg (count, max / 2, memset, dynamic_check);
@@ -15130,7 +15165,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
 	*dynamic_check = max;
       return alg;
     }
-  return algs->unknown_size;
+  return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
+#undef ALG_USABLE_P
 }
 
 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
@@ -15984,6 +16020,11 @@ ix86_expand_strlen (rtx out, rtx src, rt
   else
     {
       rtx unspec;
+
+      /* Can't use this if the user has appropriated eax, ecx, or edi.  */
+      if (global_regs[0] || global_regs[2] || global_regs[5])
+        return false;
+
       scratch2 = gen_reg_rtx (Pmode);
       scratch3 = gen_reg_rtx (Pmode);
       scratch4 = force_reg (Pmode, constm1_rtx);

  reply	other threads:[~2007-10-12 16:13 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-04  6:34 Uros Bizjak
2007-10-04 20:55 ` Nathan Froyd
2007-10-05  5:59   ` Uros Bizjak
2007-10-12 16:13     ` Nathan Froyd [this message]
2007-10-12 16:40       ` H.J. Lu
2007-10-12 17:13         ` Nathan Froyd
2007-10-12 17:28           ` Uros Bizjak
2007-10-12 19:34           ` H.J. Lu
2007-10-12 19:40             ` Nathan Froyd
  -- strict thread matches above, loose matches on Subject: below --
2007-10-12 17:26 Uros Bizjak
2007-10-03 15:01 Nathan Froyd

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071012161246.GE11809@codesourcery.com \
    --to=froydnj@codesourcery.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jh@suse.cz \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).