public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Nathan Froyd <froydnj@codesourcery.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH,i386] fix PR 11001
Date: Thu, 04 Oct 2007 20:55:00 -0000	[thread overview]
Message-ID: <20071004205533.GG4491@codesourcery.com> (raw)
In-Reply-To: <5787cf470710032334y2aefe839x8981f4c84b9a2662@mail.gmail.com>

On Thu, Oct 04, 2007 at 08:34:11AM +0200, Uros Bizjak wrote:
> > +  /* Can't use this if the user has appropriated ecx, esi, or edi.  */
> > +  if ((alg == rep_prefix_1_byte
> > +       || alg == rep_prefix_4_byte
> > +       || alg == rep_prefix_8_byte)
> > +      && (global_regs[2] || global_regs[4] || global_regs[5]))
> > +    return 0;
> > +
> 
> I think that you should put this check into decide_alg(). There you
> can decide between copy loop and libcall, also taking into account
> optimize_size flag, as well as TARGET_INLINE_ALL_STRINGOPS and
> TARGET_INLINE_ALL_STRINGOPS_DYNAMICALLY.
> 
> Please note, that rep_prefix_* algorithms are used for larger blocks.
> Perhaps in your case, we should scan algorithms table backwards until
> we hit other non-rep_prefix algorithm. If we found none, we should
> default to a loop algorithm for optimize_size or to a libcall for
> other cases.

OK, I think this patch implements what you want:

- decide_alg now consults global_regs;

- loops are used if optimize_size and we cannot use rep prefixes;

- we don't run the heuristic if we cannot use rep prefixes and default
  to libcalls if we cannot find another strategy (loops or unrolled
  loops) for the size of block we are setting/copying--for situations
  where rep prefixes cannot be used.  I am unsure of what you were
  suggesting here--I read your first paragraph as saying that
  TARGET_INLINE_ALL_STRINGOPS* be respected whereas the second paragraph
  seems to say that it's ok to punt and choose libcalls if we can't do
  rep-based copying.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  Modified ChangeLog
entry below; entries for testcases not included.

-Nathan

	PR 11001
	gcc/
	* config/i386/i386.md (strmov): Check for esi and edi usage.
	* config/i386/i386.c (decide_alg): Check whether we can use a
	rep prefix and adjust algorithm choice accordingly.
	(ix86_expand_strlen): Check for eax, ecx, and edi usage.

Index: i386.c
===================================================================
--- i386.c	(revision 128981)
+++ i386.c	(working copy)
@@ -15057,21 +15057,32 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
 	    int *dynamic_check)
 {
   const struct stringop_algs * algs;
+  /* Algorithms using the rep prefix want at least edi and ecx;
+     additionally, memset wants eax and memcpy wants esi.  Don't
+     consider such algorithms if the user has appropriated those
+     registers for their own purposes.	*/
+  int rep_prefix_usable = !(global_regs[2] || global_regs[5]
+			    || (memset ? global_regs[0] : global_regs[4]));
+
+#define ALG_USABLE_P(alg) ((alg != rep_prefix_1_byte		\
+			    && alg != rep_prefix_4_byte		\
+			    && alg != rep_prefix_8_byte)	\
+			   || rep_prefix_usable)
 
   *dynamic_check = -1;
   if (memset)
     algs = &ix86_cost->memset[TARGET_64BIT != 0];
   else
     algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
-  if (stringop_alg != no_stringop)
+  if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
     return stringop_alg;
   /* rep; movq or rep; movl is the smallest variant.  */
   else if (optimize_size)
     {
       if (!count || (count & 3))
-	return rep_prefix_1_byte;
+	return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
       else
-	return rep_prefix_4_byte;
+	return rep_prefix_usable ? rep_prefix_4_byte : loop;
     }
   /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
    */
@@ -15083,27 +15094,34 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
       enum stringop_alg alg = libcall;
       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
 	{
-	  gcc_assert (algs->size[i].max);
+	  /* We get here if the algorithms that were not libcall-based
+	     were rep-prefix based and we are unable to use rep prefixes
+	     based on global register usage.  Break out of the loop and
+	     use the heuristic below.  */
+	  if (algs->size[i].max == 0)
+	    break;
 	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
 	    {
-	      if (algs->size[i].alg != libcall)
-		alg = algs->size[i].alg;
+	      enum stringop_alg candidate = algs->size[i].alg;
+
+	      if (candidate != libcall && ALG_USABLE_P (candidate))
+		alg = candidate;
 	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
-	         last non-libcall inline algorithm.  */
+		 last non-libcall inline algorithm.  */
 	      if (TARGET_INLINE_ALL_STRINGOPS)
 		{
 		  /* When the current size is best to be copied by a libcall,
-		     but we are still forced to inline, run the heuristic bellow
+		     but we are still forced to inline, run the heuristic below
 		     that will pick code for medium sized blocks.  */
 		  if (alg != libcall)
 		    return alg;
 		  break;
 		}
-	      else
-		return algs->size[i].alg;
+	      else if (ALG_USABLE_P (candidate))
+		return candidate;
 	    }
 	}
-      gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
+      gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
     }
   /* When asked to inline the call anyway, try to pick meaningful choice.
      We look for maximal size of block that is faster to copy by hand and
@@ -15113,14 +15131,15 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
      If this turns out to be bad, we might simply specify the preferred
      choice in ix86_costs.  */
   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
-      && algs->unknown_size == libcall)
+      && algs->unknown_size == libcall && rep_prefix_usable)
     {
       int max = -1;
       enum stringop_alg alg;
       int i;
 
       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
-	if (algs->size[i].alg != libcall && algs->size[i].alg)
+	if (algs->size[i].alg != libcall && algs->size[i].alg
+	    && ALG_USABLE_P (algs->size[i].alg))
 	  max = algs->size[i].max;
       if (max == -1)
 	max = 4096;
@@ -15131,7 +15150,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
 	*dynamic_check = max;
       return alg;
     }
-  return algs->unknown_size;
+  return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
+#undef ALG_USABLE_P
 }
 
 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
@@ -15985,6 +16005,11 @@ ix86_expand_strlen (rtx out, rtx src, rt
   else
     {
       rtx unspec;
+
+      /* Can't use this if the user has appropriated eax, ecx, or edi.  */
+      if (global_regs[0] || global_regs[2] || global_regs[5])
+        return false;
+
       scratch2 = gen_reg_rtx (Pmode);
       scratch3 = gen_reg_rtx (Pmode);
       scratch4 = force_reg (Pmode, constm1_rtx);

  reply	other threads:[~2007-10-04 20:55 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-04  6:34 Uros Bizjak
2007-10-04 20:55 ` Nathan Froyd [this message]
2007-10-05  5:59   ` Uros Bizjak
2007-10-12 16:13     ` Nathan Froyd
2007-10-12 16:40       ` H.J. Lu
2007-10-12 17:13         ` Nathan Froyd
2007-10-12 17:28           ` Uros Bizjak
2007-10-12 19:34           ` H.J. Lu
2007-10-12 19:40             ` Nathan Froyd
  -- strict thread matches above, loose matches on Subject: below --
2007-10-12 17:26 Uros Bizjak
2007-10-03 15:01 Nathan Froyd

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071004205533.GG4491@codesourcery.com \
    --to=froydnj@codesourcery.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).