public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [RFC] Improve wordexp performance.
@ 2015-05-13 12:50 Ondřej Bílka
  2015-05-13 15:59 ` Joseph Myers
  2015-05-13 17:32 ` [RFC] " Carlos O'Donell
  0 siblings, 2 replies; 11+ messages in thread
From: Ondřej Bílka @ 2015-05-13 12:50 UTC (permalink / raw)
  To: libc-alpha

Hi, as I read Paul's wordexp patch I found that you use inefficient
idiom. Checking character membership with strchr is slow due to startup
cost. Much better is just use table lookup.

This patch does just that.

You could generalize this to strchr in bits/string2.h with constant
haystack but code would be ugly

It is related to improvement on my todo list namely precompute strpbrk
skip table which is done unless you have sse42 thats faster than lookup
table approach.

	* posix/wordexp.c: Precompute character tables.


diff --git a/posix/wordexp.c b/posix/wordexp.c
index e711d43..8939ca9 100644
--- a/posix/wordexp.c
+++ b/posix/wordexp.c
@@ -45,6 +45,35 @@
 #include <bits/libc-lock.h>
 #include <_itoa.h>
 
+
+static int
+in_charset (char c, uint32_t *charset)
+{
+  return charset[c / 32] & (1 << (c % 32));
+}
+
+static void 
+make_charset (char *a, uint32_t *b)
+{
+  int i;
+  for (i = 0; i < 8; i++)
+    b[i] = 0;   
+  while (*a)
+   {
+     b[*a / 32] |= 1 << (*a % 32);
+     a++;
+   }
+}
+
+struct ifss {
+  char *ifs;
+  char *ifs_white;
+  uint32_t ifs_table[8];
+  uint32_t ifs_white_table[8];
+};
+
+struct ifss nulifs;
+
 /* Undefine the following line for the production version.  */
 /* #define NDEBUG 1 */
 #include <assert.h>
@@ -63,18 +92,16 @@ extern char **__libc_argv attribute_hidden;
 /* Some forward declarations */
 static int parse_dollars (char **word, size_t *word_length, size_t *max_length,
 			  const char *words, size_t *offset, int flags,
-			  wordexp_t *pwordexp, const char *ifs,
-			  const char *ifs_white, int quoted)
+			  wordexp_t *pwordexp, struct ifss *ifsdata, int quoted)
      internal_function;
 static int parse_backtick (char **word, size_t *word_length,
 			   size_t *max_length, const char *words,
 			   size_t *offset, int flags, wordexp_t *pwordexp,
-			   const char *ifs, const char *ifs_white)
+			   struct ifss *ifsdata)
      internal_function;
 static int parse_dquote (char **word, size_t *word_length, size_t *max_length,
 			 const char *words, size_t *offset, int flags,
-			 wordexp_t *pwordexp, const char *ifs,
-			 const char *ifs_white)
+			 wordexp_t *pwordexp, struct ifss *ifsdata)
      internal_function;
 static int eval_expr (char *expr, long int *result) internal_function;
 
@@ -381,8 +408,7 @@ parse_tilde (char **word, size_t *word_length, size_t *max_length,
 static int
 internal_function
 do_parse_glob (const char *glob_word, char **word, size_t *word_length,
-	       size_t *max_length, wordexp_t *pwordexp, const char *ifs,
-	       const char *ifs_white)
+	       size_t *max_length, wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   int error;
   unsigned int match;
@@ -397,7 +423,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
       return WRDE_NOSPACE;
     }
 
-  if (ifs && !*ifs)
+  if (ifsdata->ifs && !*(ifsdata->ifs))
     {
       /* No field splitting allowed. */
       assert (globbuf.gl_pathv[0] != NULL);
@@ -414,7 +440,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
       return *word ? 0 : WRDE_NOSPACE;
     }
 
-  assert (ifs == NULL || *ifs != '\0');
+  assert (ifsdata->ifs == NULL || *(ifsdata->ifs) != '\0');
   if (*word != NULL)
     {
       free (*word);
@@ -439,7 +465,7 @@ static int
 internal_function
 parse_glob (char **word, size_t *word_length, size_t *max_length,
 	    const char *words, size_t *offset, int flags,
-	    wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+	    wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after a '*', a '[' or a '?'. */
   int error = WRDE_NOSPACE;
@@ -452,7 +478,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
   glob_list.we_offs = 0;
   for (; words[*offset] != '\0'; ++*offset)
     {
-      if (strchr (ifs, words[*offset]) != NULL)
+      if (in_charset (words[*offset], ifsdata->ifs_table))
 	/* Reached IFS */
 	break;
 
@@ -488,7 +514,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
       if (quoted != 1 && words[*offset] == '$')
 	{
 	  error = parse_dollars (word, word_length, max_length, words,
-				 offset, flags, &glob_list, ifs, ifs_white,
+				 offset, flags, &glob_list, ifsdata,
 				 quoted == 2);
 	  if (error)
 	    goto tidy_up;
@@ -523,7 +549,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
   *word = w_newword (word_length, max_length);
   for (i = 0; error == 0 && i < glob_list.we_wordc; i++)
     error = do_parse_glob (glob_list.we_wordv[i], word, word_length,
-			   max_length, pwordexp, ifs, ifs_white);
+			   max_length, pwordexp, ifsdata);
 
   /* Now tidy up */
 tidy_up:
@@ -685,7 +711,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
 	{
 	case '$':
 	  error = parse_dollars (&expr, &expr_length, &expr_maxlen,
-				 words, offset, flags, NULL, NULL, NULL, 1);
+				 words, offset, flags, NULL, &nulifs, 1);
 	  /* The ``1'' here is to tell parse_dollars not to
 	   * split the fields.
 	   */
@@ -699,7 +725,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  (*offset)++;
 	  error = parse_backtick (&expr, &expr_length, &expr_maxlen,
-				  words, offset, flags, NULL, NULL, NULL);
+				  words, offset, flags, NULL, &nulifs);
 	  /* The first NULL here is to tell parse_backtick not to
 	   * split the fields.
 	   */
@@ -884,8 +910,7 @@ exec_comm_child (char *comm, int *fildes, int showerr, int noexec)
 static int
 internal_function
 exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
-	   int flags, wordexp_t *pwordexp, const char *ifs,
-	   const char *ifs_white)
+	   int flags, wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   int fildes[2];
 #define bufsize 128
@@ -1010,10 +1035,10 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
 
 	  for (i = 0; i < buflen; ++i)
 	    {
-	      if (strchr (ifs, buffer[i]) != NULL)
+	      if (in_charset (buffer[i], ifsdata->ifs_table))
 		{
 		  /* Current character is IFS */
-		  if (strchr (ifs_white, buffer[i]) == NULL)
+		  if (!in_charset (buffer[i], ifsdata->ifs_white_table))
 		    {
 		      /* Current character is IFS but not whitespace */
 		      if (copying == 2)
@@ -1142,7 +1167,7 @@ static int
 internal_function
 parse_comm (char **word, size_t *word_length, size_t *max_length,
 	    const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
-	    const char *ifs, const char *ifs_white)
+	    struct ifss *ifsdata)
 {
   /* We are poised just after "$(" */
   int paren_depth = 1;
@@ -1191,7 +1216,7 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
 #endif
 
 		  error = exec_comm (comm, word, word_length, max_length,
-				     flags, pwordexp, ifs, ifs_white);
+				     flags, pwordexp, ifsdata);
 
 #ifdef __libc_ptf_call
 		  __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
@@ -1222,14 +1247,12 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
   return WRDE_SYNTAX;
 }
 
-#define CHAR_IN_SET(ch, char_set) \
-  (memchr (char_set "", ch, sizeof (char_set) - 1) != NULL)
 
 static int
 internal_function
 parse_param (char **word, size_t *word_length, size_t *max_length,
 	     const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
-	     const char *ifs, const char *ifs_white, int quoted)
+	     struct ifss *ifsdata, int quoted)
 {
   /* We are poised just after "$" */
   enum action
@@ -1269,6 +1292,8 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
   if (brace)
     ++*offset;
 
+  uint32_t set1[8] = {0x0, 0x410, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0}; /* *@$ */
+
   /* First collect the parameter name. */
 
   if (words[*offset] == '#')
@@ -1306,7 +1331,7 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
 	}
       while (isdigit(words[++*offset]));
     }
-  else if (CHAR_IN_SET (words[*offset], "*@$"))
+  else if (in_charset (words[*offset], set1))
     {
       /* Special parameter. */
       special = 1;
@@ -1349,8 +1374,9 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
 	    }
 	  break;
 
-	case ':':
-	  if (!CHAR_IN_SET (words[1 + *offset], "-=?+"))
+	case ':':;
+          uint32_t set2[8] = {0x0, 0xa0002800, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* -=?+ */
+	  if (!in_charset (words[1 + *offset], set2))
 	    goto syntax;
 
 	  colon_seen = 1;
@@ -1648,7 +1674,7 @@ envsubst:
 		case '$':
 		  offset = 0;
 		  error = parse_dollars (&expanded, &exp_len, &exp_maxl, p,
-					 &offset, flags, NULL, NULL, NULL, 1);
+					 &offset, flags, NULL, &nulifs, 1);
 		  if (error)
 		    {
 		      if (free_value)
@@ -1991,22 +2017,22 @@ envsubst:
 	    }
 
 	  /* Skip IFS whitespace before the field */
-	  field_begin += strspn (field_begin, ifs_white);
+	  field_begin += strspn (field_begin, ifsdata->ifs_white);
 
 	  if (!seen_nonws_ifs && *field_begin == 0)
 	    /* Nothing but whitespace */
 	    break;
 
 	  /* Search for the end of the field */
-	  field_end = field_begin + strcspn (field_begin, ifs);
+	  field_end = field_begin + strcspn (field_begin, ifsdata->ifs);
 
 	  /* Set up pointer to the character after end of field and
 	     skip whitespace IFS after it. */
-	  next_field = field_end + strspn (field_end, ifs_white);
+	  next_field = field_end + strspn (field_end, ifsdata->ifs_white);
 
 	  /* Skip at most one non-whitespace IFS character after the field */
 	  seen_nonws_ifs = 0;
-	  if (*next_field && strchr (ifs, *next_field))
+	  if (*next_field && in_charset (*next_field, ifsdata->ifs_table))
 	    {
 	      seen_nonws_ifs = 1;
 	      next_field++;
@@ -2052,13 +2078,12 @@ do_error:
   return error;
 }
 
-#undef CHAR_IN_SET
 
 static int
 internal_function
 parse_dollars (char **word, size_t *word_length, size_t *max_length,
 	       const char *words, size_t *offset, int flags,
-	       wordexp_t *pwordexp, const char *ifs, const char *ifs_white,
+	       wordexp_t *pwordexp, struct ifss *ifsdata,
 	       int quoted)
 {
   /* We are poised _at_ "$" */
@@ -2097,7 +2122,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
 
       (*offset) += 2;
       return parse_comm (word, word_length, max_length, words, offset, flags,
-			 quoted? NULL : pwordexp, ifs, ifs_white);
+			 quoted? NULL : pwordexp, ifsdata);
 
     case '[':
       (*offset) += 2;
@@ -2109,7 +2134,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
     default:
       ++(*offset);	/* parse_param needs to know if "{" is there */
       return parse_param (word, word_length, max_length, words, offset, flags,
-			   pwordexp, ifs, ifs_white, quoted);
+			   pwordexp, ifsdata, quoted);
     }
 }
 
@@ -2117,7 +2142,7 @@ static int
 internal_function
 parse_backtick (char **word, size_t *word_length, size_t *max_length,
 		const char *words, size_t *offset, int flags,
-		wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+		wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after "`" */
   int error;
@@ -2133,7 +2158,7 @@ parse_backtick (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  /* Go -- give the script to the shell */
 	  error = exec_comm (comm, word, word_length, max_length, flags,
-			     pwordexp, ifs, ifs_white);
+			     pwordexp, ifsdata);
 	  free (comm);
 	  return error;
 
@@ -2181,7 +2206,7 @@ static int
 internal_function
 parse_dquote (char **word, size_t *word_length, size_t *max_length,
 	      const char *words, size_t *offset, int flags,
-	      wordexp_t *pwordexp, const char * ifs, const char * ifs_white)
+	      wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after a double-quote */
   int error;
@@ -2195,7 +2220,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
 
 	case '$':
 	  error = parse_dollars (word, word_length, max_length, words, offset,
-				 flags, pwordexp, ifs, ifs_white, 1);
+				 flags, pwordexp, ifsdata, 1);
 	  /* The ``1'' here is to tell parse_dollars not to
 	   * split the fields.  It may need to, however ("$@").
 	   */
@@ -2207,7 +2232,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  ++(*offset);
 	  error = parse_backtick (word, word_length, max_length, words,
-				  offset, flags, NULL, NULL, NULL);
+				  offset, flags, NULL, &nulifs);
 	  /* The first NULL here is to tell parse_backtick not to
 	   * split the fields.
 	   */
@@ -2340,6 +2365,13 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       *whch = '\0';
     }
 
+  struct ifss ifsdata;
+  ifsdata.ifs = ifs;
+  ifsdata.ifs_white = ifs_white;
+  make_charset (ifs, ifsdata.ifs_table);
+  make_charset (ifs_white, ifsdata.ifs_white_table);
+
+
   for (words_offset = 0 ; words[words_offset] ; ++words_offset)
     switch (words[words_offset])
       {
@@ -2354,7 +2386,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
 
       case '$':
 	error = parse_dollars (&word, &word_length, &max_length, words,
-			       &words_offset, flags, pwordexp, ifs, ifs_white,
+			       &words_offset, flags, pwordexp, &ifsdata,
 			       0);
 
 	if (error)
@@ -2365,8 +2397,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '`':
 	++words_offset;
 	error = parse_backtick (&word, &word_length, &max_length, words,
-				&words_offset, flags, pwordexp, ifs,
-				ifs_white);
+				&words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
@@ -2376,7 +2407,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '"':
 	++words_offset;
 	error = parse_dquote (&word, &word_length, &max_length, words,
-			      &words_offset, flags, pwordexp, ifs, ifs_white);
+			      &words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
@@ -2422,21 +2453,24 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '[':
       case '?':
 	error = parse_glob (&word, &word_length, &max_length, words,
-			    &words_offset, flags, pwordexp, ifs, ifs_white);
+			    &words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
 
 	break;
 
-      default:
+      default:;
 	/* Is it a word separator? */
-	if (strchr (" \t", words[words_offset]) == NULL)
-	  {
-	    char ch = words[words_offset];
+	char ch = words[words_offset];
 
+        uint32_t set3[8] = {0x200, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* \t */
+	if (!in_charset (ch, set3))
+	  {
 	    /* Not a word separator -- but is it a valid word char? */
-	    if (strchr ("\n|&;<>(){}", ch))
+            uint32_t set4[8] = {0x400, 0x58000340, 0x0, 0x38000000, 0x0, 0x0, 
+                                0x0, 0x0}; /* \n|&;<>(){} */
+	    if (in_charset (ch, set4))
 	      {
 		/* Fail */
 		error = WRDE_BADCHAR;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Improve wordexp performance.
  2015-05-13 12:50 [RFC] Improve wordexp performance Ondřej Bílka
@ 2015-05-13 15:59 ` Joseph Myers
  2015-05-13 17:27   ` [RFC v2] " Ondřej Bílka
  2015-05-13 17:32 ` [RFC] " Carlos O'Donell
  1 sibling, 1 reply; 11+ messages in thread
From: Joseph Myers @ 2015-05-13 15:59 UTC (permalink / raw)
  To: Ondřej Bílka; +Cc: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 594 bytes --]

On Wed, 13 May 2015, Ondøej Bílka wrote:

> +static int
> +in_charset (char c, uint32_t *charset)
> +{
> +  return charset[c / 32] & (1 << (c % 32));
> +}
> +
> +static void 
> +make_charset (char *a, uint32_t *b)
> +{
> +  int i;
> +  for (i = 0; i < 8; i++)
> +    b[i] = 0;   
> +  while (*a)
> +   {
> +     b[*a / 32] |= 1 << (*a % 32);
> +     a++;
> +   }
> +}

Anything like this using possibly signed char seems suspect; I'd expect it 
to use unsigned char unless there is a clearly documented reason why 
negative values aren't possible.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC v2] Improve wordexp performance.
  2015-05-13 15:59 ` Joseph Myers
@ 2015-05-13 17:27   ` Ondřej Bílka
  0 siblings, 0 replies; 11+ messages in thread
From: Ondřej Bílka @ 2015-05-13 17:27 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-alpha

On Wed, May 13, 2015 at 03:59:20PM +0000, Joseph Myers wrote:
> On Wed, 13 May 2015, Ondřej Bílka wrote:
> Anything like this using possibly signed char seems suspect; I'd expect it 
> to use unsigned char unless there is a clearly documented reason why 
> negative values aren't possible.
> 
Sure, was bit sleepy and forgot about that. Here is v2 which intended
cast to unsigned.


diff --git a/posix/wordexp.c b/posix/wordexp.c
index e711d43..40021c5 100644
--- a/posix/wordexp.c
+++ b/posix/wordexp.c
@@ -45,6 +45,37 @@
 #include <bits/libc-lock.h>
 #include <_itoa.h>
 
+
+static int
+in_charset (char _c, uint32_t *charset)
+{
+  unsigned char c = _c;
+  return charset[c / 32] & (1 << (c % 32));
+}
+
+static void 
+make_charset (char *_a, uint32_t *b)
+{
+  unsigned char *a = (unsigned char *) _a;
+  int i;
+  for (i = 0; i < 8; i++)
+    b[i] = 0;   
+  while (*a)
+   {
+     b[*a / 32] |= 1 << (*a % 32);
+     a++;
+   }
+}
+
+struct ifss {
+  char *ifs;
+  char *ifs_white;
+  uint32_t ifs_table[8];
+  uint32_t ifs_white_table[8];
+};
+
+struct ifss nulifs;
+
 /* Undefine the following line for the production version.  */
 /* #define NDEBUG 1 */
 #include <assert.h>
@@ -63,18 +94,16 @@ extern char **__libc_argv attribute_hidden;
 /* Some forward declarations */
 static int parse_dollars (char **word, size_t *word_length, size_t *max_length,
 			  const char *words, size_t *offset, int flags,
-			  wordexp_t *pwordexp, const char *ifs,
-			  const char *ifs_white, int quoted)
+			  wordexp_t *pwordexp, struct ifss *ifsdata, int quoted)
      internal_function;
 static int parse_backtick (char **word, size_t *word_length,
 			   size_t *max_length, const char *words,
 			   size_t *offset, int flags, wordexp_t *pwordexp,
-			   const char *ifs, const char *ifs_white)
+			   struct ifss *ifsdata)
      internal_function;
 static int parse_dquote (char **word, size_t *word_length, size_t *max_length,
 			 const char *words, size_t *offset, int flags,
-			 wordexp_t *pwordexp, const char *ifs,
-			 const char *ifs_white)
+			 wordexp_t *pwordexp, struct ifss *ifsdata)
      internal_function;
 static int eval_expr (char *expr, long int *result) internal_function;
 
@@ -381,8 +410,7 @@ parse_tilde (char **word, size_t *word_length, size_t *max_length,
 static int
 internal_function
 do_parse_glob (const char *glob_word, char **word, size_t *word_length,
-	       size_t *max_length, wordexp_t *pwordexp, const char *ifs,
-	       const char *ifs_white)
+	       size_t *max_length, wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   int error;
   unsigned int match;
@@ -397,7 +425,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
       return WRDE_NOSPACE;
     }
 
-  if (ifs && !*ifs)
+  if (ifsdata->ifs && !*(ifsdata->ifs))
     {
       /* No field splitting allowed. */
       assert (globbuf.gl_pathv[0] != NULL);
@@ -414,7 +442,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length,
       return *word ? 0 : WRDE_NOSPACE;
     }
 
-  assert (ifs == NULL || *ifs != '\0');
+  assert (ifsdata->ifs == NULL || *(ifsdata->ifs) != '\0');
   if (*word != NULL)
     {
       free (*word);
@@ -439,7 +467,7 @@ static int
 internal_function
 parse_glob (char **word, size_t *word_length, size_t *max_length,
 	    const char *words, size_t *offset, int flags,
-	    wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+	    wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after a '*', a '[' or a '?'. */
   int error = WRDE_NOSPACE;
@@ -452,7 +480,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
   glob_list.we_offs = 0;
   for (; words[*offset] != '\0'; ++*offset)
     {
-      if (strchr (ifs, words[*offset]) != NULL)
+      if (in_charset (words[*offset], ifsdata->ifs_table))
 	/* Reached IFS */
 	break;
 
@@ -488,7 +516,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
       if (quoted != 1 && words[*offset] == '$')
 	{
 	  error = parse_dollars (word, word_length, max_length, words,
-				 offset, flags, &glob_list, ifs, ifs_white,
+				 offset, flags, &glob_list, ifsdata,
 				 quoted == 2);
 	  if (error)
 	    goto tidy_up;
@@ -523,7 +551,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length,
   *word = w_newword (word_length, max_length);
   for (i = 0; error == 0 && i < glob_list.we_wordc; i++)
     error = do_parse_glob (glob_list.we_wordv[i], word, word_length,
-			   max_length, pwordexp, ifs, ifs_white);
+			   max_length, pwordexp, ifsdata);
 
   /* Now tidy up */
 tidy_up:
@@ -685,7 +713,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
 	{
 	case '$':
 	  error = parse_dollars (&expr, &expr_length, &expr_maxlen,
-				 words, offset, flags, NULL, NULL, NULL, 1);
+				 words, offset, flags, NULL, &nulifs, 1);
 	  /* The ``1'' here is to tell parse_dollars not to
 	   * split the fields.
 	   */
@@ -699,7 +727,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  (*offset)++;
 	  error = parse_backtick (&expr, &expr_length, &expr_maxlen,
-				  words, offset, flags, NULL, NULL, NULL);
+				  words, offset, flags, NULL, &nulifs);
 	  /* The first NULL here is to tell parse_backtick not to
 	   * split the fields.
 	   */
@@ -884,8 +912,7 @@ exec_comm_child (char *comm, int *fildes, int showerr, int noexec)
 static int
 internal_function
 exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
-	   int flags, wordexp_t *pwordexp, const char *ifs,
-	   const char *ifs_white)
+	   int flags, wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   int fildes[2];
 #define bufsize 128
@@ -1010,10 +1037,10 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
 
 	  for (i = 0; i < buflen; ++i)
 	    {
-	      if (strchr (ifs, buffer[i]) != NULL)
+	      if (in_charset (buffer[i], ifsdata->ifs_table))
 		{
 		  /* Current character is IFS */
-		  if (strchr (ifs_white, buffer[i]) == NULL)
+		  if (!in_charset (buffer[i], ifsdata->ifs_white_table))
 		    {
 		      /* Current character is IFS but not whitespace */
 		      if (copying == 2)
@@ -1142,7 +1169,7 @@ static int
 internal_function
 parse_comm (char **word, size_t *word_length, size_t *max_length,
 	    const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
-	    const char *ifs, const char *ifs_white)
+	    struct ifss *ifsdata)
 {
   /* We are poised just after "$(" */
   int paren_depth = 1;
@@ -1191,7 +1218,7 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
 #endif
 
 		  error = exec_comm (comm, word, word_length, max_length,
-				     flags, pwordexp, ifs, ifs_white);
+				     flags, pwordexp, ifsdata);
 
 #ifdef __libc_ptf_call
 		  __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
@@ -1222,14 +1249,12 @@ parse_comm (char **word, size_t *word_length, size_t *max_length,
   return WRDE_SYNTAX;
 }
 
-#define CHAR_IN_SET(ch, char_set) \
-  (memchr (char_set "", ch, sizeof (char_set) - 1) != NULL)
 
 static int
 internal_function
 parse_param (char **word, size_t *word_length, size_t *max_length,
 	     const char *words, size_t *offset, int flags, wordexp_t *pwordexp,
-	     const char *ifs, const char *ifs_white, int quoted)
+	     struct ifss *ifsdata, int quoted)
 {
   /* We are poised just after "$" */
   enum action
@@ -1269,6 +1294,8 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
   if (brace)
     ++*offset;
 
+  uint32_t set1[8] = {0x0, 0x410, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0}; /* *@$ */
+
   /* First collect the parameter name. */
 
   if (words[*offset] == '#')
@@ -1306,7 +1333,7 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
 	}
       while (isdigit(words[++*offset]));
     }
-  else if (CHAR_IN_SET (words[*offset], "*@$"))
+  else if (in_charset (words[*offset], set1))
     {
       /* Special parameter. */
       special = 1;
@@ -1349,8 +1376,9 @@ parse_param (char **word, size_t *word_length, size_t *max_length,
 	    }
 	  break;
 
-	case ':':
-	  if (!CHAR_IN_SET (words[1 + *offset], "-=?+"))
+	case ':':;
+          uint32_t set2[8] = {0x0, 0xa0002800, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* -=?+ */
+	  if (!in_charset (words[1 + *offset], set2))
 	    goto syntax;
 
 	  colon_seen = 1;
@@ -1648,7 +1676,7 @@ envsubst:
 		case '$':
 		  offset = 0;
 		  error = parse_dollars (&expanded, &exp_len, &exp_maxl, p,
-					 &offset, flags, NULL, NULL, NULL, 1);
+					 &offset, flags, NULL, &nulifs, 1);
 		  if (error)
 		    {
 		      if (free_value)
@@ -1991,22 +2019,22 @@ envsubst:
 	    }
 
 	  /* Skip IFS whitespace before the field */
-	  field_begin += strspn (field_begin, ifs_white);
+	  field_begin += strspn (field_begin, ifsdata->ifs_white);
 
 	  if (!seen_nonws_ifs && *field_begin == 0)
 	    /* Nothing but whitespace */
 	    break;
 
 	  /* Search for the end of the field */
-	  field_end = field_begin + strcspn (field_begin, ifs);
+	  field_end = field_begin + strcspn (field_begin, ifsdata->ifs);
 
 	  /* Set up pointer to the character after end of field and
 	     skip whitespace IFS after it. */
-	  next_field = field_end + strspn (field_end, ifs_white);
+	  next_field = field_end + strspn (field_end, ifsdata->ifs_white);
 
 	  /* Skip at most one non-whitespace IFS character after the field */
 	  seen_nonws_ifs = 0;
-	  if (*next_field && strchr (ifs, *next_field))
+	  if (*next_field && in_charset (*next_field, ifsdata->ifs_table))
 	    {
 	      seen_nonws_ifs = 1;
 	      next_field++;
@@ -2052,13 +2080,12 @@ do_error:
   return error;
 }
 
-#undef CHAR_IN_SET
 
 static int
 internal_function
 parse_dollars (char **word, size_t *word_length, size_t *max_length,
 	       const char *words, size_t *offset, int flags,
-	       wordexp_t *pwordexp, const char *ifs, const char *ifs_white,
+	       wordexp_t *pwordexp, struct ifss *ifsdata,
 	       int quoted)
 {
   /* We are poised _at_ "$" */
@@ -2097,7 +2124,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
 
       (*offset) += 2;
       return parse_comm (word, word_length, max_length, words, offset, flags,
-			 quoted? NULL : pwordexp, ifs, ifs_white);
+			 quoted? NULL : pwordexp, ifsdata);
 
     case '[':
       (*offset) += 2;
@@ -2109,7 +2136,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length,
     default:
       ++(*offset);	/* parse_param needs to know if "{" is there */
       return parse_param (word, word_length, max_length, words, offset, flags,
-			   pwordexp, ifs, ifs_white, quoted);
+			   pwordexp, ifsdata, quoted);
     }
 }
 
@@ -2117,7 +2144,7 @@ static int
 internal_function
 parse_backtick (char **word, size_t *word_length, size_t *max_length,
 		const char *words, size_t *offset, int flags,
-		wordexp_t *pwordexp, const char *ifs, const char *ifs_white)
+		wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after "`" */
   int error;
@@ -2133,7 +2160,7 @@ parse_backtick (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  /* Go -- give the script to the shell */
 	  error = exec_comm (comm, word, word_length, max_length, flags,
-			     pwordexp, ifs, ifs_white);
+			     pwordexp, ifsdata);
 	  free (comm);
 	  return error;
 
@@ -2181,7 +2208,7 @@ static int
 internal_function
 parse_dquote (char **word, size_t *word_length, size_t *max_length,
 	      const char *words, size_t *offset, int flags,
-	      wordexp_t *pwordexp, const char * ifs, const char * ifs_white)
+	      wordexp_t *pwordexp, struct ifss *ifsdata)
 {
   /* We are poised just after a double-quote */
   int error;
@@ -2195,7 +2222,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
 
 	case '$':
 	  error = parse_dollars (word, word_length, max_length, words, offset,
-				 flags, pwordexp, ifs, ifs_white, 1);
+				 flags, pwordexp, ifsdata, 1);
 	  /* The ``1'' here is to tell parse_dollars not to
 	   * split the fields.  It may need to, however ("$@").
 	   */
@@ -2207,7 +2234,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length,
 	case '`':
 	  ++(*offset);
 	  error = parse_backtick (word, word_length, max_length, words,
-				  offset, flags, NULL, NULL, NULL);
+				  offset, flags, NULL, &nulifs);
 	  /* The first NULL here is to tell parse_backtick not to
 	   * split the fields.
 	   */
@@ -2340,6 +2367,13 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       *whch = '\0';
     }
 
+  struct ifss ifsdata;
+  ifsdata.ifs = ifs;
+  ifsdata.ifs_white = ifs_white;
+  make_charset (ifs, ifsdata.ifs_table);
+  make_charset (ifs_white, ifsdata.ifs_white_table);
+
+
   for (words_offset = 0 ; words[words_offset] ; ++words_offset)
     switch (words[words_offset])
       {
@@ -2354,7 +2388,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
 
       case '$':
 	error = parse_dollars (&word, &word_length, &max_length, words,
-			       &words_offset, flags, pwordexp, ifs, ifs_white,
+			       &words_offset, flags, pwordexp, &ifsdata,
 			       0);
 
 	if (error)
@@ -2365,8 +2399,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '`':
 	++words_offset;
 	error = parse_backtick (&word, &word_length, &max_length, words,
-				&words_offset, flags, pwordexp, ifs,
-				ifs_white);
+				&words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
@@ -2376,7 +2409,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '"':
 	++words_offset;
 	error = parse_dquote (&word, &word_length, &max_length, words,
-			      &words_offset, flags, pwordexp, ifs, ifs_white);
+			      &words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
@@ -2422,21 +2455,24 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
       case '[':
       case '?':
 	error = parse_glob (&word, &word_length, &max_length, words,
-			    &words_offset, flags, pwordexp, ifs, ifs_white);
+			    &words_offset, flags, pwordexp, &ifsdata);
 
 	if (error)
 	  goto do_error;
 
 	break;
 
-      default:
+      default:;
 	/* Is it a word separator? */
-	if (strchr (" \t", words[words_offset]) == NULL)
-	  {
-	    char ch = words[words_offset];
+	char ch = words[words_offset];
 
+        uint32_t set3[8] = {0x200, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* \t */
+	if (!in_charset (ch, set3))
+	  {
 	    /* Not a word separator -- but is it a valid word char? */
-	    if (strchr ("\n|&;<>(){}", ch))
+            uint32_t set4[8] = {0x400, 0x58000340, 0x0, 0x38000000, 0x0, 0x0, 
+                                0x0, 0x0}; /* \n|&;<>(){} */
+	    if (in_charset (ch, set4))
 	      {
 		/* Fail */
 		error = WRDE_BADCHAR;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Improve wordexp performance.
  2015-05-13 12:50 [RFC] Improve wordexp performance Ondřej Bílka
  2015-05-13 15:59 ` Joseph Myers
@ 2015-05-13 17:32 ` Carlos O'Donell
  2015-05-13 18:36   ` Ondřej Bílka
  1 sibling, 1 reply; 11+ messages in thread
From: Carlos O'Donell @ 2015-05-13 17:32 UTC (permalink / raw)
  To: Ondřej Bílka, libc-alpha

On 05/13/2015 08:49 AM, Ondřej Bílka wrote:
> Hi, as I read Paul's wordexp patch I found that you use inefficient
> idiom. Checking character membership with strchr is slow due to startup
> cost. Much better is just use table lookup.

How did you test it was faster?

Could you please add a wordexp microbenchmark to show the gains?

Cheers,
Carlos.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Improve wordexp performance.
  2015-05-13 17:32 ` [RFC] " Carlos O'Donell
@ 2015-05-13 18:36   ` Ondřej Bílka
  2015-05-13 19:54     ` Carlos O'Donell
  0 siblings, 1 reply; 11+ messages in thread
From: Ondřej Bílka @ 2015-05-13 18:36 UTC (permalink / raw)
  To: Carlos O'Donell; +Cc: libc-alpha

On Wed, May 13, 2015 at 01:32:00PM -0400, Carlos O'Donell wrote:
> On 05/13/2015 08:49 AM, Ondřej Bílka wrote:
> > Hi, as I read Paul's wordexp patch I found that you use inefficient
> > idiom. Checking character membership with strchr is slow due to startup
> > cost. Much better is just use table lookup.
> 
> How did you test it was faster?
>
You don't need to look at barometer to see if its raining. It does
single memory access which takes around 5-6 cycles is faster than strchr that takes ~30 cycles 
and cannot be faster as it also needs to do a memory access.

 
> Could you please add a wordexp microbenchmark to show the gains?
> 
I could. Its easy to make microbenchmark that shows improved performance.
Also its easy to make microbenchmark that makes it a regression. Just
use 128 byte IFS env. variable and call wordexp("x", foo, 0)
I could also make microbenchmark that is inconclusive as performance
difference is lost in syscall overhead of finding filenames by glob.

So I wont make microbenchmark as it would be useless exercise for pretty
obvious case.

You would need runtime profiling to get something useful.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] Improve wordexp performance.
  2015-05-13 18:36   ` Ondřej Bílka
@ 2015-05-13 19:54     ` Carlos O'Donell
  2015-06-06 12:42       ` Wordexp benchtest: good, bad and unreliable Ondřej Bílka
  0 siblings, 1 reply; 11+ messages in thread
From: Carlos O'Donell @ 2015-05-13 19:54 UTC (permalink / raw)
  To: Ondřej Bílka; +Cc: libc-alpha

On 05/13/2015 02:35 PM, Ondřej Bílka wrote:
> On Wed, May 13, 2015 at 01:32:00PM -0400, Carlos O'Donell wrote:
>> On 05/13/2015 08:49 AM, Ondřej Bílka wrote:
>>> Hi, as I read Paul's wordexp patch I found that you use inefficient
>>> idiom. Checking character membership with strchr is slow due to startup
>>> cost. Much better is just use table lookup.
>>
>> How did you test it was faster?
>>
> You don't need to look at barometer to see if its raining. It does
> single memory access which takes around 5-6 cycles is faster than strchr that takes ~30 cycles 
> and cannot be faster as it also needs to do a memory access.
> 
>  
>> Could you please add a wordexp microbenchmark to show the gains?
>>
> I could. Its easy to make microbenchmark that shows improved performance.
> Also its easy to make microbenchmark that makes it a regression. Just
> use 128 byte IFS env. variable and call wordexp("x", foo, 0)
> I could also make microbenchmark that is inconclusive as performance
> difference is lost in syscall overhead of finding filenames by glob.

And those would all represent various workloads which we care about?

I'm fine having 3 different wordexp microbenchmarks.

> So I wont make microbenchmark as it would be useless exercise for pretty
> obvious case.

OK.
 
> You would need runtime profiling to get something useful.

I don't agree with that. I think a microbenchmark of wordexp would be useful
as a regression test for this case.

Cheers,
Carlos.
 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Wordexp benchtest: good, bad and unreliable.
  2015-05-13 19:54     ` Carlos O'Donell
@ 2015-06-06 12:42       ` Ondřej Bílka
  2015-06-16 17:43         ` [PING] " Ondřej Bílka
  0 siblings, 1 reply; 11+ messages in thread
From: Ondřej Bílka @ 2015-06-06 12:42 UTC (permalink / raw)
  To: Carlos O'Donell; +Cc: libc-alpha

On Wed, May 13, 2015 at 02:55:28PM -0400, Carlos O'Donell wrote:
> On 05/13/2015 02:35 PM, Ondřej Bílka wrote:
> > On Wed, May 13, 2015 at 01:32:00PM -0400, Carlos O'Donell wrote:
> >> On 05/13/2015 08:49 AM, Ondřej Bílka wrote:
> >>> Hi, as I read Paul's wordexp patch I found that you use inefficient
> >>> idiom. Checking character membership with strchr is slow due to startup
> >>> cost. Much better is just use table lookup.
> >>
> >> How did you test it was faster?
> >>
> > You don't need to look at barometer to see if its raining. It does
> > single memory access which takes around 5-6 cycles is faster than strchr that takes ~30 cycles 
> > and cannot be faster as it also needs to do a memory access.
> > 
> >  
> >> Could you please add a wordexp microbenchmark to show the gains?
> >>
> > I could. Its easy to make microbenchmark that shows improved performance.
> > Also its easy to make microbenchmark that makes it a regression. Just
> > use 128 byte IFS env. variable and call wordexp("x", foo, 0)
> > I could also make microbenchmark that is inconclusive as performance
> > difference is lost in syscall overhead of finding filenames by glob.
> 
> And those would all represent various workloads which we care about?
> 
> I'm fine having 3 different wordexp microbenchmarks.
> 
> > So I wont make microbenchmark as it would be useless exercise for pretty
> > obvious case.
> 
> OK.
>  
> > You would need runtime profiling to get something useful.
> 
> I don't agree with that. I think a microbenchmark of wordexp would be useful
> as a regression test for this case.
> 
> Cheers,
> Carlos.
>  

As I mentioned before for wordexp performance depends on too many
parameters. So here is microbenchmark that shows some improvements and
some regressions. There is lot of noise. I ran these three times and on
one * and ffffffffoo pattern become twice slower for no reason.

So how do you decide with conflicted data like this?


old implementation
                       	wordexp
Pattern foo flags 0:	1081.77
Pattern ffoo flags 0:	621.516
Pattern ffffoo flags 0:	697.562
Pattern ffffffffoo flags 0:	867.922
Pattern ffffffffffffffffoo flags 0:	1110.75
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1567.92
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2541.03
Pattern * flags 0:	141070
Pattern foo flags 0:	1061.08
Pattern ffoo flags 0:	1190.48
Pattern ffffoo flags 0:	1146.83
Pattern ffffffffoo flags 0:	1407.77
Pattern ffffffffffffffffoo flags 0:	1734.33
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2143.02
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3100.41
Pattern * flags 0:	205510


                       	wordexp
Pattern foo flags 0:	1081.84
Pattern ffoo flags 0:	672.422
Pattern ffffoo flags 0:	704.828
Pattern ffffffffoo flags 0:	875.188
Pattern ffffffffffffffffoo flags 0:	1089.7
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1594.03
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2575.28
Pattern * flags 0:	144160
Pattern foo flags 0:	1062.45
Pattern ffoo flags 0:	1099.72
Pattern ffffoo flags 0:	1153.95
Pattern ffffffffoo flags 0:	1391.45
Pattern ffffffffffffffffoo flags 0:	1820.36
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2129
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3171.5
Pattern * flags 0:	208475


                       	wordexp
Pattern foo flags 0:	1086.2
Pattern ffoo flags 0:	615.078
Pattern ffffoo flags 0:	688.594
Pattern ffffffffoo flags 0:	851.984
Pattern ffffffffffffffffoo flags 0:	1085.48
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1562.95
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2540.08
Pattern * flags 0:	287636
Pattern foo flags 0:	1068.11
Pattern ffoo flags 0:	1092.5
Pattern ffffoo flags 0:	1143.7
Pattern ffffffffoo flags 0:	1380.66
Pattern ffffffffffffffffoo flags 0:	1763.88
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2165.72
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3095.41
Pattern * flags 0:	207373

new


                       	wordexp
Pattern foo flags 0:	1161.95
Pattern ffoo flags 0:	631.016
Pattern ffffoo flags 0:	714.797
Pattern ffffffffoo flags 0:	2209.84
Pattern ffffffffffffffffoo flags 0:	1024.17
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1408.22
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2223.39
Pattern * flags 0:	146925
Pattern foo flags 0:	1660.66
Pattern ffoo flags 0:	1673.34
Pattern ffffoo flags 0:	1730.23
Pattern ffffffffoo flags 0:	2030.42
Pattern ffffffffffffffffoo flags 0:	2245.59
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2512.95
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3312.95
Pattern * flags 0:	210492


                       	wordexp
Pattern foo flags 0:	1184.19
Pattern ffoo flags 0:	634.062
Pattern ffffoo flags 0:	683.688
Pattern ffffffffoo flags 0:	826.891
Pattern ffffffffffffffffoo flags 0:	1020.27
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1405.89
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2185.44
Pattern * flags 0:	140257
Pattern foo flags 0:	1649.77
Pattern ffoo flags 0:	1668.39
Pattern ffffoo flags 0:	1722.23
Pattern ffffffffoo flags 0:	1924.27
Pattern ffffffffffffffffoo flags 0:	2322.56
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2502.8
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3306.39
Pattern * flags 0:	208003


                       	wordexp
Pattern foo flags 0:	1097.7
Pattern ffoo flags 0:	655.891
Pattern ffffoo flags 0:	692.453
Pattern ffffffffoo flags 0:	860.203
Pattern ffffffffffffffffoo flags 0:	1009.38
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1426.02
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2179.64
Pattern * flags 0:	144492
Pattern foo flags 0:	1668.47
Pattern ffoo flags 0:	1684.38
Pattern ffffoo flags 0:	1718.75
Pattern ffffffffoo flags 0:	1932.91
Pattern ffffffffffffffffoo flags 0:	2294.95
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2514.89
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3329.14
Pattern * flags 0:	211152

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 8e615e5..fb737da 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -35,7 +35,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		strcat strchr strchrnul strcmp strcpy strcspn strlen \
 		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
-		strcoll
+		strcoll wordexp
 string-bench-all := $(string-bench)
 
 # We have to generate locales
diff --git a/benchtests/bench-wordexp.c b/benchtests/bench-wordexp.c
new file mode 100644
index 0000000..ffadbcc
--- /dev/null
+++ b/benchtests/bench-wordexp.c
@@ -0,0 +1,97 @@
+/* Measure wordexp functions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TEST_MAIN
+#define TEST_NAME "wordexp"
+#include "bench-string.h"
+
+#include <wordexp.h>
+
+typedef int (*proto_t) (const char *, wordexp_t *, int);
+
+IMPL (wordexp, 1)
+
+
+static void
+do_one_test (impl_t *impl, const char *s1, int flag)
+{
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
+    {
+      wordexp_t out;
+      CALL (impl, s1, &out, flag);
+    }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
+}
+
+
+static void
+do_test (const char *s1, int flag)
+{
+
+  printf ("Pattern %s flags %i:", s1, flag);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s1, flag);
+
+  putchar ('\n');
+}
+
+static int
+test_main (void)
+{
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  do_test ("foo", 0);
+  do_test ("ffoo", 0);
+  do_test ("ffffoo", 0);
+  do_test ("ffffffffoo", 0);
+  do_test ("ffffffffffffffffoo", 0);
+  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
+do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
+
+
+  do_test ("*", 0);
+  setenv("IFS","                                                  \
+                                               ", 1);
+
+  do_test ("foo", 0);
+  do_test ("ffoo", 0);
+  do_test ("ffffoo", 0);
+  do_test ("ffffffffoo", 0);
+  do_test ("ffffffffffffffffoo", 0);
+  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
+do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
+  do_test ("*", 0);
+
+  return ret;
+}
+
+#include "../test-skeleton.c"

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PING] Wordexp benchtest: good, bad and unreliable.
  2015-06-06 12:42       ` Wordexp benchtest: good, bad and unreliable Ondřej Bílka
@ 2015-06-16 17:43         ` Ondřej Bílka
  2015-06-16 19:40           ` Carlos O'Donell
  0 siblings, 1 reply; 11+ messages in thread
From: Ondřej Bílka @ 2015-06-16 17:43 UTC (permalink / raw)
  To: Carlos O'Donell; +Cc: libc-alpha


So Carlos whats your comment on benchmark results?
On Sat, Jun 06, 2015 at 12:57:11PM +0200, Ondřej Bílka wrote:
> On Wed, May 13, 2015 at 02:55:28PM -0400, Carlos O'Donell wrote:
> > On 05/13/2015 02:35 PM, Ondřej Bílka wrote:
> > > On Wed, May 13, 2015 at 01:32:00PM -0400, Carlos O'Donell wrote:
> > >> On 05/13/2015 08:49 AM, Ondřej Bílka wrote:
> > >>> Hi, as I read Paul's wordexp patch I found that you use inefficient
> > >>> idiom. Checking character membership with strchr is slow due to startup
> > >>> cost. Much better is just use table lookup.
> > >>
> > >> How did you test it was faster?
> > >>
> > > You don't need to look at barometer to see if its raining. It does
> > > single memory access which takes around 5-6 cycles is faster than strchr that takes ~30 cycles 
> > > and cannot be faster as it also needs to do a memory access.
> > > 
> > >  
> > >> Could you please add a wordexp microbenchmark to show the gains?
> > >>
> > > I could. Its easy to make microbenchmark that shows improved performance.
> > > Also its easy to make microbenchmark that makes it a regression. Just
> > > use 128 byte IFS env. variable and call wordexp("x", foo, 0)
> > > I could also make microbenchmark that is inconclusive as performance
> > > difference is lost in syscall overhead of finding filenames by glob.
> > 
> > And those would all represent various workloads which we care about?
> > 
> > I'm fine having 3 different wordexp microbenchmarks.
> > 
> > > So I wont make microbenchmark as it would be useless exercise for pretty
> > > obvious case.
> > 
> > OK.
> >  
> > > You would need runtime profiling to get something useful.
> > 
> > I don't agree with that. I think a microbenchmark of wordexp would be useful
> > as a regression test for this case.
> > 
> > Cheers,
> > Carlos.
> >  
> 
> As I mentioned before for wordexp performance depends on too many
> parameters. So here is microbenchmark that shows some improvements and
> some regressions. There is lot of noise. I ran these three times and on
> one * and ffffffffoo pattern become twice slower for no reason.
> 
> So how do you decide with conflicted data like this?
> 
> 
> old implementation
>                        	wordexp
> Pattern foo flags 0:	1081.77
> Pattern ffoo flags 0:	621.516
> Pattern ffffoo flags 0:	697.562
> Pattern ffffffffoo flags 0:	867.922
> Pattern ffffffffffffffffoo flags 0:	1110.75
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1567.92
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2541.03
> Pattern * flags 0:	141070
> Pattern foo flags 0:	1061.08
> Pattern ffoo flags 0:	1190.48
> Pattern ffffoo flags 0:	1146.83
> Pattern ffffffffoo flags 0:	1407.77
> Pattern ffffffffffffffffoo flags 0:	1734.33
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2143.02
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3100.41
> Pattern * flags 0:	205510
> 
> 
>                        	wordexp
> Pattern foo flags 0:	1081.84
> Pattern ffoo flags 0:	672.422
> Pattern ffffoo flags 0:	704.828
> Pattern ffffffffoo flags 0:	875.188
> Pattern ffffffffffffffffoo flags 0:	1089.7
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1594.03
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2575.28
> Pattern * flags 0:	144160
> Pattern foo flags 0:	1062.45
> Pattern ffoo flags 0:	1099.72
> Pattern ffffoo flags 0:	1153.95
> Pattern ffffffffoo flags 0:	1391.45
> Pattern ffffffffffffffffoo flags 0:	1820.36
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2129
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3171.5
> Pattern * flags 0:	208475
> 
> 
>                        	wordexp
> Pattern foo flags 0:	1086.2
> Pattern ffoo flags 0:	615.078
> Pattern ffffoo flags 0:	688.594
> Pattern ffffffffoo flags 0:	851.984
> Pattern ffffffffffffffffoo flags 0:	1085.48
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1562.95
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2540.08
> Pattern * flags 0:	287636
> Pattern foo flags 0:	1068.11
> Pattern ffoo flags 0:	1092.5
> Pattern ffffoo flags 0:	1143.7
> Pattern ffffffffoo flags 0:	1380.66
> Pattern ffffffffffffffffoo flags 0:	1763.88
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2165.72
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3095.41
> Pattern * flags 0:	207373
> 
> new
> 
> 
>                        	wordexp
> Pattern foo flags 0:	1161.95
> Pattern ffoo flags 0:	631.016
> Pattern ffffoo flags 0:	714.797
> Pattern ffffffffoo flags 0:	2209.84
> Pattern ffffffffffffffffoo flags 0:	1024.17
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1408.22
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2223.39
> Pattern * flags 0:	146925
> Pattern foo flags 0:	1660.66
> Pattern ffoo flags 0:	1673.34
> Pattern ffffoo flags 0:	1730.23
> Pattern ffffffffoo flags 0:	2030.42
> Pattern ffffffffffffffffoo flags 0:	2245.59
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2512.95
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3312.95
> Pattern * flags 0:	210492
> 
> 
>                        	wordexp
> Pattern foo flags 0:	1184.19
> Pattern ffoo flags 0:	634.062
> Pattern ffffoo flags 0:	683.688
> Pattern ffffffffoo flags 0:	826.891
> Pattern ffffffffffffffffoo flags 0:	1020.27
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1405.89
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2185.44
> Pattern * flags 0:	140257
> Pattern foo flags 0:	1649.77
> Pattern ffoo flags 0:	1668.39
> Pattern ffffoo flags 0:	1722.23
> Pattern ffffffffoo flags 0:	1924.27
> Pattern ffffffffffffffffoo flags 0:	2322.56
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2502.8
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3306.39
> Pattern * flags 0:	208003
> 
> 
>                        	wordexp
> Pattern foo flags 0:	1097.7
> Pattern ffoo flags 0:	655.891
> Pattern ffffoo flags 0:	692.453
> Pattern ffffffffoo flags 0:	860.203
> Pattern ffffffffffffffffoo flags 0:	1009.38
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1426.02
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2179.64
> Pattern * flags 0:	144492
> Pattern foo flags 0:	1668.47
> Pattern ffoo flags 0:	1684.38
> Pattern ffffoo flags 0:	1718.75
> Pattern ffffffffoo flags 0:	1932.91
> Pattern ffffffffffffffffoo flags 0:	2294.95
> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2514.89
> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3329.14
> Pattern * flags 0:	211152
> 
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 8e615e5..fb737da 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -35,7 +35,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
>  		strcat strchr strchrnul strcmp strcpy strcspn strlen \
>  		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
>  		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
> -		strcoll
> +		strcoll wordexp
>  string-bench-all := $(string-bench)
>  
>  # We have to generate locales
> diff --git a/benchtests/bench-wordexp.c b/benchtests/bench-wordexp.c
> new file mode 100644
> index 0000000..ffadbcc
> --- /dev/null
> +++ b/benchtests/bench-wordexp.c
> @@ -0,0 +1,97 @@
> +/* Measure wordexp functions.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#define TEST_MAIN
> +#define TEST_NAME "wordexp"
> +#include "bench-string.h"
> +
> +#include <wordexp.h>
> +
> +typedef int (*proto_t) (const char *, wordexp_t *, int);
> +
> +IMPL (wordexp, 1)
> +
> +
> +static void
> +do_one_test (impl_t *impl, const char *s1, int flag)
> +{
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
> +    {
> +      wordexp_t out;
> +      CALL (impl, s1, &out, flag);
> +    }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
> +}
> +
> +
> +static void
> +do_test (const char *s1, int flag)
> +{
> +
> +  printf ("Pattern %s flags %i:", s1, flag);
> +
> +  FOR_EACH_IMPL (impl, 0)
> +    do_one_test (impl, s1, flag);
> +
> +  putchar ('\n');
> +}
> +
> +static int
> +test_main (void)
> +{
> +  test_init ();
> +
> +  printf ("%23s", "");
> +  FOR_EACH_IMPL (impl, 0)
> +    printf ("\t%s", impl->name);
> +  putchar ('\n');
> +
> +  do_test ("foo", 0);
> +  do_test ("ffoo", 0);
> +  do_test ("ffffoo", 0);
> +  do_test ("ffffffffoo", 0);
> +  do_test ("ffffffffffffffffoo", 0);
> +  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
> +do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
> +
> +
> +  do_test ("*", 0);
> +  setenv("IFS","                                                  \
> +                                               ", 1);
> +
> +  do_test ("foo", 0);
> +  do_test ("ffoo", 0);
> +  do_test ("ffffoo", 0);
> +  do_test ("ffffffffoo", 0);
> +  do_test ("ffffffffffffffffoo", 0);
> +  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
> +do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
> +  do_test ("*", 0);
> +
> +  return ret;
> +}
> +
> +#include "../test-skeleton.c"

-- 

Increased sunspot activity.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PING] Wordexp benchtest: good, bad and unreliable.
  2015-06-16 17:43         ` [PING] " Ondřej Bílka
@ 2015-06-16 19:40           ` Carlos O'Donell
  2015-06-18 17:39             ` Ondřej Bílka
  0 siblings, 1 reply; 11+ messages in thread
From: Carlos O'Donell @ 2015-06-16 19:40 UTC (permalink / raw)
  To: Ondřej Bílka; +Cc: libc-alpha

On 06/16/2015 01:32 PM, Ondřej Bílka wrote:
>> As I mentioned before for wordexp performance depends on too many
>> parameters. So here is microbenchmark that shows some improvements and
>> some regressions. There is lot of noise. I ran these three times and on
>> one * and ffffffffoo pattern become twice slower for no reason.

There is a reason, but we just don't know it yet.

>> So how do you decide with conflicted data like this?

You can't make an "automatic" or "automated" decision, but as a baseline
the statistics do help. With noisy benchmarks you can still look at
potential trends in the noise. We don't want the bad patterns to get 5x
slower.

Similarly we will have conflicting workloads modeled as benchmarks, and
the hard part is that as an expert we have to make a difficult choice.
We have to understand the workload and decide "Yes, we ignore this performance
decrease because we expect fewer people will suffer." However, in order
to do that we need comments about exactly what workload we're trying to model.

For most of our microbenchmarks the workloads are "raw performance of function
call" and that's very simplistic, it's also very generic.

>> old implementation
>>                        	wordexp
>> Pattern foo flags 0:	1081.77
>> Pattern ffoo flags 0:	621.516
>> Pattern ffffoo flags 0:	697.562
>> Pattern ffffffffoo flags 0:	867.922
>> Pattern ffffffffffffffffoo flags 0:	1110.75
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1567.92
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2541.03
>> Pattern * flags 0:	141070
>> Pattern foo flags 0:	1061.08
>> Pattern ffoo flags 0:	1190.48
>> Pattern ffffoo flags 0:	1146.83
>> Pattern ffffffffoo flags 0:	1407.77
>> Pattern ffffffffffffffffoo flags 0:	1734.33
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2143.02
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3100.41
>> Pattern * flags 0:	205510
>>
>>
>>                        	wordexp
>> Pattern foo flags 0:	1081.84
>> Pattern ffoo flags 0:	672.422
>> Pattern ffffoo flags 0:	704.828
>> Pattern ffffffffoo flags 0:	875.188
>> Pattern ffffffffffffffffoo flags 0:	1089.7
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1594.03
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2575.28
>> Pattern * flags 0:	144160
>> Pattern foo flags 0:	1062.45
>> Pattern ffoo flags 0:	1099.72
>> Pattern ffffoo flags 0:	1153.95
>> Pattern ffffffffoo flags 0:	1391.45
>> Pattern ffffffffffffffffoo flags 0:	1820.36
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2129
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3171.5
>> Pattern * flags 0:	208475
>>
>>
>>                        	wordexp
>> Pattern foo flags 0:	1086.2
>> Pattern ffoo flags 0:	615.078
>> Pattern ffffoo flags 0:	688.594
>> Pattern ffffffffoo flags 0:	851.984
>> Pattern ffffffffffffffffoo flags 0:	1085.48
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1562.95
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2540.08
>> Pattern * flags 0:	287636
>> Pattern foo flags 0:	1068.11
>> Pattern ffoo flags 0:	1092.5
>> Pattern ffffoo flags 0:	1143.7
>> Pattern ffffffffoo flags 0:	1380.66
>> Pattern ffffffffffffffffoo flags 0:	1763.88
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2165.72
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3095.41
>> Pattern * flags 0:	207373
>>
>> new
>>
>>
>>                        	wordexp
>> Pattern foo flags 0:	1161.95
>> Pattern ffoo flags 0:	631.016
>> Pattern ffffoo flags 0:	714.797
>> Pattern ffffffffoo flags 0:	2209.84
>> Pattern ffffffffffffffffoo flags 0:	1024.17
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1408.22
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2223.39
>> Pattern * flags 0:	146925
>> Pattern foo flags 0:	1660.66
>> Pattern ffoo flags 0:	1673.34
>> Pattern ffffoo flags 0:	1730.23
>> Pattern ffffffffoo flags 0:	2030.42
>> Pattern ffffffffffffffffoo flags 0:	2245.59
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2512.95
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3312.95
>> Pattern * flags 0:	210492
>>
>>
>>                        	wordexp
>> Pattern foo flags 0:	1184.19
>> Pattern ffoo flags 0:	634.062
>> Pattern ffffoo flags 0:	683.688
>> Pattern ffffffffoo flags 0:	826.891
>> Pattern ffffffffffffffffoo flags 0:	1020.27
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1405.89
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2185.44
>> Pattern * flags 0:	140257
>> Pattern foo flags 0:	1649.77
>> Pattern ffoo flags 0:	1668.39
>> Pattern ffffoo flags 0:	1722.23
>> Pattern ffffffffoo flags 0:	1924.27
>> Pattern ffffffffffffffffoo flags 0:	2322.56
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2502.8
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3306.39
>> Pattern * flags 0:	208003
>>
>>
>>                        	wordexp
>> Pattern foo flags 0:	1097.7
>> Pattern ffoo flags 0:	655.891
>> Pattern ffffoo flags 0:	692.453
>> Pattern ffffffffoo flags 0:	860.203
>> Pattern ffffffffffffffffoo flags 0:	1009.38
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1426.02
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2179.64
>> Pattern * flags 0:	144492
>> Pattern foo flags 0:	1668.47
>> Pattern ffoo flags 0:	1684.38
>> Pattern ffffoo flags 0:	1718.75
>> Pattern ffffffffoo flags 0:	1932.91
>> Pattern ffffffffffffffffoo flags 0:	2294.95
>> Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2514.89
>> Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3329.14
>> Pattern * flags 0:	211152
>>
>> diff --git a/benchtests/Makefile b/benchtests/Makefile
>> index 8e615e5..fb737da 100644
>> --- a/benchtests/Makefile
>> +++ b/benchtests/Makefile
>> @@ -35,7 +35,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
>>  		strcat strchr strchrnul strcmp strcpy strcspn strlen \
>>  		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
>>  		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
>> -		strcoll
>> +		strcoll wordexp
>>  string-bench-all := $(string-bench)
>>  
>>  # We have to generate locales
>> diff --git a/benchtests/bench-wordexp.c b/benchtests/bench-wordexp.c
>> new file mode 100644
>> index 0000000..ffadbcc
>> --- /dev/null
>> +++ b/benchtests/bench-wordexp.c
>> @@ -0,0 +1,97 @@
>> +/* Measure wordexp functions.
>> +   Copyright (C) 2015 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +#define TEST_MAIN
>> +#define TEST_NAME "wordexp"
>> +#include "bench-string.h"
>> +
>> +#include <wordexp.h>
>> +
>> +typedef int (*proto_t) (const char *, wordexp_t *, int);
>> +
>> +IMPL (wordexp, 1)
>> +
>> +
>> +static void
>> +do_one_test (impl_t *impl, const char *s1, int flag)
>> +{
>> +  size_t i, iters = INNER_LOOP_ITERS;
>> +  timing_t start, stop, cur;
>> +
>> +  TIMING_NOW (start);
>> +  for (i = 0; i < iters; ++i)
>> +    {
>> +      wordexp_t out;
>> +      CALL (impl, s1, &out, flag);
>> +    }
>> +  TIMING_NOW (stop);
>> +
>> +  TIMING_DIFF (cur, start, stop);
>> +
>> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>> +}
>> +
>> +
>> +static void
>> +do_test (const char *s1, int flag)
>> +{
>> +
>> +  printf ("Pattern %s flags %i:", s1, flag);
>> +
>> +  FOR_EACH_IMPL (impl, 0)
>> +    do_one_test (impl, s1, flag);
>> +
>> +  putchar ('\n');
>> +}
>> +
>> +static int
>> +test_main (void)
>> +{
>> +  test_init ();
>> +
>> +  printf ("%23s", "");
>> +  FOR_EACH_IMPL (impl, 0)
>> +    printf ("\t%s", impl->name);
>> +  putchar ('\n');
>> +
>> +  do_test ("foo", 0);
>> +  do_test ("ffoo", 0);
>> +  do_test ("ffffoo", 0);
>> +  do_test ("ffffffffoo", 0);
>> +  do_test ("ffffffffffffffffoo", 0);
>> +  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
>> +do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);

This group of tests should have a comment about what's it's testing.

What workload are we modeling?

>> +
>> +
>> +  do_test ("*", 0);
>> +  setenv("IFS","                                                  \
>> +                                               ", 1);
>> +
>> +  do_test ("foo", 0);
>> +  do_test ("ffoo", 0);
>> +  do_test ("ffffoo", 0);
>> +  do_test ("ffffffffoo", 0);
>> +  do_test ("ffffffffffffffffoo", 0);
>> +  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
>> +do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
>> +  do_test ("*", 0);
>> +

Similarly with the IFS change. This is not a normal IFS change for an average workload.

Usually one changes IFS to be a series of characters, or just tab, or just newline etc.

>> +  return ret;
>> +}
>> +
>> +#include "../test-skeleton.c"
> 

The benchmark looks OK to me modulo the added comments.

Cheers,
Carlos.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PING] Wordexp benchtest: good, bad and unreliable.
  2015-06-16 19:40           ` Carlos O'Donell
@ 2015-06-18 17:39             ` Ondřej Bílka
  2015-06-18 17:53               ` Carlos O'Donell
  0 siblings, 1 reply; 11+ messages in thread
From: Ondřej Bílka @ 2015-06-18 17:39 UTC (permalink / raw)
  To: Carlos O'Donell; +Cc: libc-alpha

On Tue, Jun 16, 2015 at 03:24:03PM -0400, Carlos O'Donell wrote:
> On 06/16/2015 01:32 PM, Ondřej Bílka wrote:
> >> As I mentioned before for wordexp performance depends on too many
> >> parameters. So here is microbenchmark that shows some improvements and
> >> some regressions. There is lot of noise. I ran these three times and on
> >> one * and ffffffffoo pattern become twice slower for no reason.
> 
> There is a reason, but we just don't know it yet.
>
My guess that difference was caused by stat variance but I couldn't
verify that. 

> >> So how do you decide with conflicted data like this?
> 
> You can't make an "automatic" or "automated" decision, but as a baseline
> the statistics do help. With noisy benchmarks you can still look at
> potential trends in the noise. We don't want the bad patterns to get 5x
> slower.
> 

Thats for most of functions simply unavoidable, what I submitted is classical example that you cannot avoid some paterns being 5 times slower as you are shifting worst case.

Here with my wordexp change patterns that didn't used strchr(ifs,x)
would get five times slower when user uses big IFS.

On other hand patterns that checked strchr(ifs,x) will become five times
faster with this change.

These tradeoffs are actually quite routine, for example most of string
routines tend to be slow when inputs are always at page boundary. You
cannot avoid that as alternatives move worst case to much more probable
scenarios like empirical probability ~1/8 that 16 bytes cross 64 byte boundary.

I could improve performance by optimizing that case aggresively. However
it would likely be net loss as cross-page is most of time cold path and
I try to optimize it to size.

> Similarly we will have conflicting workloads modeled as benchmarks, and
> the hard part is that as an expert we have to make a difficult choice.
> We have to understand the workload and decide "Yes, we ignore this performance
> decrease because we expect fewer people will suffer." However, in order
> to do that we need comments about exactly what workload we're trying to model.
>
Thats problem that you need to collect lot of domain knowledge to be
usable. Problem is that I didn't find yet application that uses it.
 
> For most of our microbenchmarks the workloads are "raw performance of function
> call" and that's very simplistic, it's also very generic.
> 
Thats quite problem when there are many variants that need to be tried
what should you priorize, as this migth be improved or not depending on
average ifs size and average size of pattern.

> This group of tests should have a comment about what's it's testing.
> 
> What workload are we modeling?
>
None in particular, just trying to measure performance change.

> >> +
> >> +
> >> +  do_test ("*", 0);
> >> +  setenv("IFS","                                                  \
> >> +                                               ", 1);
> >> +
> >> +  do_test ("foo", 0);
> >> +  do_test ("ffoo", 0);
> >> +  do_test ("ffffoo", 0);
> >> +  do_test ("ffffffffoo", 0);
> >> +  do_test ("ffffffffffffffffoo", 0);
> >> +  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
> >> +do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
> >> +  do_test ("*", 0);
> >> +
> 
> Similarly with the IFS change. This is not a normal IFS change for an average workload.
> 
> Usually one changes IFS to be a series of characters, or just tab, or just newline etc.
> 
These are worst case for my change. There is constant overhead per call
to parse IFS, then my patch saves some cycles per character depending on
which path we take.

On paths of f...ffoo form you save cost of conversion
 strchr ("\n|&;<>(){}", ch)
per character.

I read code more and I could also measure saving of
strchr(ifs, ch) 
per character for patterns ?fff...fffo.

However measuring exact performance is bit difficult again due to noise
as wordexp calls glob which takes ~70000 cycles. Thats lot compared with
savings in wordexp itself, difference between longest pattern was 4500
which this change reduced to 500 cycles. I am not completely sure about
that numbers as I got that estimate by subtracting performance of glob
without ifs.

old one:

                       	wordexp
Pattern foo flags 0:	1131.86
Pattern ffoo flags 0:	629.5
Pattern ffffoo flags 0:	709.562
Pattern ffffffffoo flags 0:	866.359
Pattern ffffffffffffffffoo flags 0:	1115.53
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1591.81
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2608.45
Pattern ?foo flags 0:	74733.9
Pattern ?ffoo flags 0:	71044.9
Pattern ?ffffoo flags 0:	71165.1
Pattern ?ffffffffoo flags 0:	71009
Pattern ?ffffffffffffffffoo flags 0:	71283.4
Pattern ?ffffffffffffffffffffffffffffffffoo flags 0:	71748.2
Pattern ?ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	73446.5
Pattern * flags 0:	135284
Hundred byte IFS
Pattern foo flags 0:	1067.41
Pattern ffoo flags 0:	1103.17
Pattern ffffoo flags 0:	1164.02
Pattern ffffffffoo flags 0:	1449.36
Pattern ffffffffffffffffoo flags 0:	2375.23
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2170.95
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3120.31
Pattern ?foo flags 0:	72397
Pattern ?ffoo flags 0:	71573.4
Pattern ?ffffoo flags 0:	71768.1
Pattern ?ffffffffoo flags 0:	71925.9
Pattern ?ffffffffffffffffoo flags 0:	72591.7
Pattern ?ffffffffffffffffffffffffffffffffoo flags 0:	73172.4
Pattern ?ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	78188.5
Pattern * flags 0:	134651


new one:

                       	wordexp
Pattern foo flags 0:	1056.98
Pattern ffoo flags 0:	680.562
Pattern ffffoo flags 0:	690.219
Pattern ffffffffoo flags 0:	842.516
Pattern ffffffffffffffffoo flags 0:	1016.67
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	1411.91
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	2174.77
Pattern ?foo flags 0:	81948.1
Pattern ?ffoo flags 0:	71048.2
Pattern ?ffffoo flags 0:	71345
Pattern ?ffffffffoo flags 0:	71098.2
Pattern ?ffffffffffffffffoo flags 0:	71314.9
Pattern ?ffffffffffffffffffffffffffffffffoo flags 0:	71669
Pattern ?ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	72204.1
Pattern * flags 0:	137048
Hundred byte IFS
Pattern foo flags 0:	1568.56
Pattern ffoo flags 0:	1581.34
Pattern ffffoo flags 0:	1627.89
Pattern ffffffffoo flags 0:	1924.27
Pattern ffffffffffffffffoo flags 0:	2109.28
Pattern ffffffffffffffffffffffffffffffffoo flags 0:	2456.67
Pattern ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	3204.42
Pattern ?foo flags 0:	74253.8
Pattern ?ffoo flags 0:	72561.5
Pattern ?ffffoo flags 0:	72630.3
Pattern ?ffffffffoo flags 0:	72794.3
Pattern ?ffffffffffffffffoo flags 0:	72951
Pattern ?ffffffffffffffffffffffffffffffffoo flags 0:	73194.2
Pattern ?ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo flags 0:	74068.2
Pattern * flags 0:	137750


diff --git a/benchtests/Makefile b/benchtests/Makefile
index 8e615e5..fb737da 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -35,7 +35,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		strcat strchr strchrnul strcmp strcpy strcspn strlen \
 		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
-		strcoll
+		strcoll wordexp
 string-bench-all := $(string-bench)
 
 # We have to generate locales
diff --git a/benchtests/bench-wordexp.c b/benchtests/bench-wordexp.c
new file mode 100644
index 0000000..7696f73
--- /dev/null
+++ b/benchtests/bench-wordexp.c
@@ -0,0 +1,118 @@
+/* Measure wordexp functions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TEST_MAIN
+#define TEST_NAME "wordexp"
+#include "bench-string.h"
+
+#include <wordexp.h>
+
+typedef int (*proto_t) (const char *, wordexp_t *, int);
+
+IMPL (wordexp, 1)
+
+
+static void
+do_one_test (impl_t *impl, const char *s1, int flag)
+{
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
+    {
+      wordexp_t out;
+      CALL (impl, s1, &out, flag);
+    }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
+}
+
+
+static void
+do_test (const char *s1, int flag)
+{
+
+  printf ("Pattern %s flags %i:", s1, flag);
+
+  FOR_EACH_IMPL (impl, 0)
+    do_one_test (impl, s1, flag);
+
+  putchar ('\n');
+}
+
+static int
+test_main (void)
+{
+  test_init ();
+
+  printf ("%23s", "");
+  FOR_EACH_IMPL (impl, 0)
+    printf ("\t%s", impl->name);
+  putchar ('\n');
+
+  do_test ("foo", 0);
+  do_test ("ffoo", 0);
+  do_test ("ffffoo", 0);
+  do_test ("ffffffffoo", 0);
+  do_test ("ffffffffffffffffoo", 0);
+  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
+do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
+
+  do_test ("?foo", 0);
+  do_test ("?ffoo", 0);
+  do_test ("?ffffoo", 0);
+  do_test ("?ffffffffoo", 0);
+  do_test ("?ffffffffffffffffoo", 0);
+  do_test ("?ffffffffffffffffffffffffffffffffoo", 0);
+do_test ("?ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
+
+
+  do_test ("*", 0);
+
+  printf ("Hundred byte IFS\n");
+
+
+  setenv("IFS","                                                  \
+                                               ", 1);
+
+  do_test ("foo", 0);
+  do_test ("ffoo", 0);
+  do_test ("ffffoo", 0);
+  do_test ("ffffffffoo", 0);
+  do_test ("ffffffffffffffffoo", 0);
+  do_test ("ffffffffffffffffffffffffffffffffoo", 0);
+do_test ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
+
+  do_test ("?foo", 0);
+  do_test ("?ffoo", 0);
+  do_test ("?ffffoo", 0);
+  do_test ("?ffffffffoo", 0);
+  do_test ("?ffffffffffffffffoo", 0);
+  do_test ("?ffffffffffffffffffffffffffffffffoo", 0);
+do_test ("?ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffoo", 0);
+
+  do_test ("*", 0);
+
+  return ret;
+}
+
+#include "../test-skeleton.c"

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PING] Wordexp benchtest: good, bad and unreliable.
  2015-06-18 17:39             ` Ondřej Bílka
@ 2015-06-18 17:53               ` Carlos O'Donell
  0 siblings, 0 replies; 11+ messages in thread
From: Carlos O'Donell @ 2015-06-18 17:53 UTC (permalink / raw)
  To: Ondřej Bílka; +Cc: libc-alpha

On 06/18/2015 12:34 PM, Ondřej Bílka wrote:
>> Similarly we will have conflicting workloads modeled as benchmarks, and
>> the hard part is that as an expert we have to make a difficult choice.
>> We have to understand the workload and decide "Yes, we ignore this performance
>> decrease because we expect fewer people will suffer." However, in order
>> to do that we need comments about exactly what workload we're trying to model.
>>
> Thats problem that you need to collect lot of domain knowledge to be
> usable. Problem is that I didn't find yet application that uses it.

mailx uses wordexp to expand folder paths.

>>
> None in particular, just trying to measure performance change.

What about looking at feeding paths into wordexp and using it like mailx
does to do light-weight globbing?

See mailx-12.5/fio.c, where globname uses wordexp to do the expansions,
and also expand as used in the rest of mailx.

Cheers,
Carlos.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-06-18 17:39 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-13 12:50 [RFC] Improve wordexp performance Ondřej Bílka
2015-05-13 15:59 ` Joseph Myers
2015-05-13 17:27   ` [RFC v2] " Ondřej Bílka
2015-05-13 17:32 ` [RFC] " Carlos O'Donell
2015-05-13 18:36   ` Ondřej Bílka
2015-05-13 19:54     ` Carlos O'Donell
2015-06-06 12:42       ` Wordexp benchtest: good, bad and unreliable Ondřej Bílka
2015-06-16 17:43         ` [PING] " Ondřej Bílka
2015-06-16 19:40           ` Carlos O'Donell
2015-06-18 17:39             ` Ondřej Bílka
2015-06-18 17:53               ` Carlos O'Donell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).