public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [RFA] C++-ify parse_format_string
@ 2017-11-23 16:46 Tom Tromey
  2017-11-23 21:14 ` Simon Marchi
  0 siblings, 1 reply; 11+ messages in thread
From: Tom Tromey @ 2017-11-23 16:46 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey

This replaces parse_format_string with a class, removing some
constructors along the way.  While doing this, I found that one
argument to gen_printf is unused, so I removed it.

Also, I am not completely sure, but the use of `release' in
maint_agent_printf_command and parse_cmd_to_aexpr seems like it may
leak expressions.

Regression tested by the buildbot.

ChangeLog
2017-11-23  Tom Tromey  <tom@tromey.com>

	* printcmd.c (ui_printf): Update.  Use std::vector.
	* common/format.h (struct format_piece): Add constructor.
	<string>: Now const.
	(class format_pieces): New class.
	(parse_format_string, free_format_pieces)
	(free_format_pieces_cleanup): Remove.
	* common/format.c (format_pieces::format_pieces): Rename from
	parse_format_string.  Update.
	(free_format_pieces, free_format_pieces_cleanup): Remove.
	* breakpoint.c (parse_cmd_to_aexpr): Update.  Use std::vector.
	* ax-gdb.h (gen_printf): Remove argument.
	* ax-gdb.c (gen_printf): Remove "frags" argument.
	(maint_agent_printf_command): Update.  Use std::vector.

gdbserver/ChangeLog
2017-11-23  Tom Tromey  <tom@tromey.com>

	* ax.c (ax_printf): Update.
---
 gdb/ChangeLog           | 16 +++++++++++++
 gdb/ax-gdb.c            | 17 ++++----------
 gdb/ax-gdb.h            |  2 --
 gdb/breakpoint.c        | 19 ++++-----------
 gdb/common/format.c     | 62 ++++---------------------------------------------
 gdb/common/format.h     | 42 ++++++++++++++++++++++++++-------
 gdb/gdbserver/ChangeLog |  4 ++++
 gdb/gdbserver/ax.c      | 22 ++++++++----------
 gdb/printcmd.c          | 41 +++++++++++---------------------
 9 files changed, 89 insertions(+), 136 deletions(-)

diff --git a/gdb/ax-gdb.c b/gdb/ax-gdb.c
index 52ca081a82..e22e0e6582 100644
--- a/gdb/ax-gdb.c
+++ b/gdb/ax-gdb.c
@@ -2541,7 +2541,6 @@ agent_expr_up
 gen_printf (CORE_ADDR scope, struct gdbarch *gdbarch,
 	    CORE_ADDR function, LONGEST channel,
 	    const char *format, int fmtlen,
-	    struct format_piece *frags,
 	    int nargs, struct expression **exprs)
 {
   agent_expr_up ax (new agent_expr (gdbarch, scope));
@@ -2680,12 +2679,8 @@ agent_eval_command (const char *exp, int from_tty)
 static void
 maint_agent_printf_command (const char *cmdrest, int from_tty)
 {
-  struct cleanup *old_chain = 0;
-  struct expression *argvec[100];
   struct frame_info *fi = get_current_frame ();	/* need current scope */
   const char *format_start, *format_end;
-  struct format_piece *fpieces;
-  int nargs;
 
   /* We don't deal with overlay debugging at the moment.  We need to
      think more carefully about this.  If you copy this code into
@@ -2704,9 +2699,7 @@ maint_agent_printf_command (const char *cmdrest, int from_tty)
 
   format_start = cmdrest;
 
-  fpieces = parse_format_string (&cmdrest);
-
-  old_chain = make_cleanup (free_format_pieces_cleanup, &fpieces);
+  format_pieces fpieces (&cmdrest);
 
   format_end = cmdrest;
 
@@ -2722,15 +2715,14 @@ maint_agent_printf_command (const char *cmdrest, int from_tty)
     cmdrest++;
   cmdrest = skip_spaces (cmdrest);
 
-  nargs = 0;
+  std::vector<struct expression *> argvec;
   while (*cmdrest != '\0')
     {
       const char *cmd1;
 
       cmd1 = cmdrest;
       expression_up expr = parse_exp_1 (&cmd1, 0, (struct block *) 0, 1);
-      argvec[nargs] = expr.release ();
-      ++nargs;
+      argvec.push_back (expr.release ());
       cmdrest = cmd1;
       if (*cmdrest == ',')
 	++cmdrest;
@@ -2741,14 +2733,13 @@ maint_agent_printf_command (const char *cmdrest, int from_tty)
   agent_expr_up agent = gen_printf (get_frame_pc (fi), get_current_arch (),
 				    0, 0,
 				    format_start, format_end - format_start,
-				    fpieces, nargs, argvec);
+				    argvec.size (), argvec.data ());
   ax_reqs (agent.get ());
   ax_print (gdb_stdout, agent.get ());
 
   /* It would be nice to call ax_reqs here to gather some general info
      about the expression, and then print out the result.  */
 
-  do_cleanups (old_chain);
   dont_repeat ();
 }
 
diff --git a/gdb/ax-gdb.h b/gdb/ax-gdb.h
index 8b5ab46c66..834ddffe05 100644
--- a/gdb/ax-gdb.h
+++ b/gdb/ax-gdb.h
@@ -120,10 +120,8 @@ extern void gen_expr (struct expression *exp, union exp_element **pc,
 
 extern void require_rvalue (struct agent_expr *ax, struct axs_value *value);
 
-struct format_piece;
 extern agent_expr_up gen_printf (CORE_ADDR, struct gdbarch *,
 				 CORE_ADDR, LONGEST, const char *, int,
-				 struct format_piece *,
 				 int, struct expression **);
 
 #endif /* AX_GDB_H */
diff --git a/gdb/breakpoint.c b/gdb/breakpoint.c
index b48c405b08..41c542e08b 100644
--- a/gdb/breakpoint.c
+++ b/gdb/breakpoint.c
@@ -2230,12 +2230,8 @@ build_target_condition_list (struct bp_location *bl)
 static agent_expr_up
 parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
 {
-  struct cleanup *old_cleanups = 0;
-  struct expression **argvec;
   const char *cmdrest;
   const char *format_start, *format_end;
-  struct format_piece *fpieces;
-  int nargs;
   struct gdbarch *gdbarch = get_current_arch ();
 
   if (cmd == NULL)
@@ -2252,9 +2248,7 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
 
   format_start = cmdrest;
 
-  fpieces = parse_format_string (&cmdrest);
-
-  old_cleanups = make_cleanup (free_format_pieces_cleanup, &fpieces);
+  format_pieces fpieces (&cmdrest);
 
   format_end = cmdrest;
 
@@ -2272,17 +2266,14 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
 
   /* For each argument, make an expression.  */
 
-  argvec = (struct expression **) alloca (strlen (cmd)
-					 * sizeof (struct expression *));
-
-  nargs = 0;
+  std::vector<struct expression *> argvec;
   while (*cmdrest != '\0')
     {
       const char *cmd1;
 
       cmd1 = cmdrest;
       expression_up expr = parse_exp_1 (&cmd1, scope, block_for_pc (scope), 1);
-      argvec[nargs++] = expr.release ();
+      argvec.push_back (expr.release ());
       cmdrest = cmd1;
       if (*cmdrest == ',')
 	++cmdrest;
@@ -2296,7 +2287,7 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
     {
       aexpr = gen_printf (scope, gdbarch, 0, 0,
 			  format_start, format_end - format_start,
-			  fpieces, nargs, argvec);
+			  argvec.size (), argvec.data ());
     }
   CATCH (ex, RETURN_MASK_ERROR)
     {
@@ -2306,8 +2297,6 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
     }
   END_CATCH
 
-  do_cleanups (old_cleanups);
-
   /* We have a valid agent expression, return it.  */
   return aexpr;
 }
diff --git a/gdb/common/format.c b/gdb/common/format.c
index 8cb15511fa..95cb8053c4 100644
--- a/gdb/common/format.c
+++ b/gdb/common/format.c
@@ -20,17 +20,13 @@
 #include "common-defs.h"
 #include "format.h"
 
-struct format_piece *
-parse_format_string (const char **arg)
+format_pieces::format_pieces (const char **arg)
 {
   const char *s;
   char *f, *string;
   const char *prev_start;
   const char *percent_loc;
   char *sub_start, *current_substring;
-  struct format_piece *pieces;
-  int next_frag;
-  int max_pieces;
   enum argclass this_argclass;
 
   s = *arg;
@@ -100,12 +96,7 @@ parse_format_string (const char **arg)
   /* Need extra space for the '\0's.  Doubling the size is sufficient.  */
 
   current_substring = (char *) xmalloc (strlen (string) * 2 + 1000);
-
-  max_pieces = strlen (string) + 2;
-
-  pieces = XNEWVEC (struct format_piece, max_pieces);
-
-  next_frag = 0;
+  m_storage.reset (current_substring);
 
   /* Now scan the string for %-specs and see what kinds of args they want.
      argclass classifies the %-specs so we can give printf-type functions
@@ -135,9 +126,7 @@ parse_format_string (const char **arg)
 	current_substring += f - 1 - prev_start;
 	*current_substring++ = '\0';
 
-	pieces[next_frag].string = sub_start;
-	pieces[next_frag].argclass = literal_piece;
-	next_frag++;
+	m_pieces.emplace_back (sub_start, literal_piece);
 
 	percent_loc = f - 1;
 
@@ -343,9 +332,7 @@ parse_format_string (const char **arg)
 
 	prev_start = f;
 
-	pieces[next_frag].string = sub_start;
-	pieces[next_frag].argclass = this_argclass;
-	next_frag++;
+	m_pieces.emplace_back (sub_start, this_argclass);
       }
 
   /* Record the remainder of the string.  */
@@ -356,44 +343,5 @@ parse_format_string (const char **arg)
   current_substring += f - prev_start;
   *current_substring++ = '\0';
 
-  pieces[next_frag].string = sub_start;
-  pieces[next_frag].argclass = literal_piece;
-  next_frag++;
-
-  /* Record an end-of-array marker.  */
-
-  pieces[next_frag].string = NULL;
-  pieces[next_frag].argclass = literal_piece;
-
-  return pieces;
+  m_pieces.emplace_back (sub_start, literal_piece);
 }
-
-void
-free_format_pieces (struct format_piece *pieces)
-{
-  if (!pieces)
-    return;
-
-  /* We happen to know that all the string pieces are in the block
-     pointed to by the first string piece.  */
-  if (pieces[0].string)
-    xfree (pieces[0].string);
-
-  xfree (pieces);
-}
-
-void
-free_format_pieces_cleanup (void *ptr)
-{
-  struct format_piece **location = (struct format_piece **) ptr;
-
-  if (location == NULL)
-    return;
-
-  if (*location != NULL)
-    {
-      free_format_pieces (*location);
-      *location = NULL;
-    }
-}
-
diff --git a/gdb/common/format.h b/gdb/common/format.h
index f3a94b8bbb..dd083f9ac1 100644
--- a/gdb/common/format.h
+++ b/gdb/common/format.h
@@ -48,22 +48,46 @@ enum argclass
 
 struct format_piece
 {
-  char *string;
+  format_piece (const char *str, enum argclass argc)
+    : string (str),
+      argclass (argc)
+  {
+  }
+
+  const char *string;
   enum argclass argclass;
 };
 
-/* Return an array of printf fragments found at the given string, and
-   rewrite ARG with a pointer to the end of the format string.  */
+class format_pieces
+{
+public:
+
+  format_pieces (const char **arg);
+  ~format_pieces () = default;
+
+  DISABLE_COPY_AND_ASSIGN (format_pieces);
 
-extern struct format_piece *parse_format_string (const char **arg);
+  format_piece &operator[] (size_t index)
+  {
+    return m_pieces[index];
+  }
 
-/* Given a pointer to an array of format pieces, free any memory that
-   would have been allocated by parse_format_string.  */
+  typedef std::vector<format_piece>::iterator iterator;
 
-extern void free_format_pieces (struct format_piece *frags);
+  iterator begin ()
+  {
+    return m_pieces.begin ();
+  }
 
-/* Freeing, cast as a cleanup.  */
+  iterator end ()
+  {
+    return m_pieces.end ();
+  }
 
-extern void free_format_pieces_cleanup (void *);
+private:
+
+  std::vector<format_piece> m_pieces;
+  gdb::unique_xmalloc_ptr<char> m_storage;
+};
 
 #endif /* COMMON_FORMAT_H */
diff --git a/gdb/gdbserver/ax.c b/gdb/gdbserver/ax.c
index 35ed2c69ad..7e5a409cfd 100644
--- a/gdb/gdbserver/ax.c
+++ b/gdb/gdbserver/ax.c
@@ -816,30 +816,29 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 	   int nargs, ULONGEST *args)
 {
   const char *f = format;
-  struct format_piece *fpieces;
-  int i, fp;
-  char *current_substring;
+  int i;
+  const char *current_substring;
   int nargs_wanted;
 
   ax_debug ("Printf of \"%s\" with %d args", format, nargs);
 
-  fpieces = parse_format_string (&f);
+  format_pieces fpieces (&f);
 
   nargs_wanted = 0;
-  for (fp = 0; fpieces[fp].string != NULL; fp++)
-    if (fpieces[fp].argclass != literal_piece)
+  for (auto &&piece : fpieces)
+    if (piece.argclass != literal_piece)
       ++nargs_wanted;
 
   if (nargs != nargs_wanted)
     error (_("Wrong number of arguments for specified format-string"));
 
   i = 0;
-  for (fp = 0; fpieces[fp].string != NULL; fp++)
+  for (auto &&piece : fpieces)
     {
-      current_substring = fpieces[fp].string;
+      current_substring = piece.string;
       ax_debug ("current substring is '%s', class is %d",
-		current_substring, fpieces[fp].argclass);
-      switch (fpieces[fp].argclass)
+		current_substring, piece.argclass);
+      switch (piece.argclass)
 	{
 	case string_arg:
 	  {
@@ -914,11 +913,10 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 	}
 
       /* Maybe advance to the next argument.  */
-      if (fpieces[fp].argclass != literal_piece)
+      if (piece.argclass != literal_piece)
 	++i;
     }
 
-  free_format_pieces (fpieces);
   fflush (stdout);
 }
 
diff --git a/gdb/printcmd.c b/gdb/printcmd.c
index 2e596d1f09..7ca86232a1 100644
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@@ -2427,14 +2427,8 @@ printf_pointer (struct ui_file *stream, const char *format,
 static void
 ui_printf (const char *arg, struct ui_file *stream)
 {
-  struct format_piece *fpieces;
   const char *s = arg;
-  struct value **val_args;
-  int allocated_args = 20;
-  struct cleanup *old_cleanups;
-
-  val_args = XNEWVEC (struct value *, allocated_args);
-  old_cleanups = make_cleanup (free_current_contents, &val_args);
+  std::vector<struct value *> val_args;
 
   if (s == 0)
     error_no_arg (_("format-control string and values to print"));
@@ -2445,9 +2439,7 @@ ui_printf (const char *arg, struct ui_file *stream)
   if (*s++ != '"')
     error (_("Bad format string, missing '\"'."));
 
-  fpieces = parse_format_string (&s);
-
-  make_cleanup (free_format_pieces_cleanup, &fpieces);
+  format_pieces fpieces (&s);
 
   if (*s++ != '"')
     error (_("Bad format string, non-terminated '\"'."));
@@ -2462,14 +2454,13 @@ ui_printf (const char *arg, struct ui_file *stream)
   s = skip_spaces (s);
 
   {
-    int nargs = 0;
     int nargs_wanted;
-    int i, fr;
-    char *current_substring;
+    int i;
+    const char *current_substring;
 
     nargs_wanted = 0;
-    for (fr = 0; fpieces[fr].string != NULL; fr++)
-      if (fpieces[fr].argclass != literal_piece)
+    for (auto &&piece : fpieces)
+      if (piece.argclass != literal_piece)
 	++nargs_wanted;
 
     /* Now, parse all arguments and evaluate them.
@@ -2479,28 +2470,23 @@ ui_printf (const char *arg, struct ui_file *stream)
       {
 	const char *s1;
 
-	if (nargs == allocated_args)
-	  val_args = (struct value **) xrealloc ((char *) val_args,
-						 (allocated_args *= 2)
-						 * sizeof (struct value *));
 	s1 = s;
-	val_args[nargs] = parse_to_comma_and_eval (&s1);
+	val_args.push_back (parse_to_comma_and_eval (&s1));
 
-	nargs++;
 	s = s1;
 	if (*s == ',')
 	  s++;
       }
 
-    if (nargs != nargs_wanted)
+    if (val_args.size () != nargs_wanted)
       error (_("Wrong number of arguments for specified format-string"));
 
     /* Now actually print them.  */
     i = 0;
-    for (fr = 0; fpieces[fr].string != NULL; fr++)
+    for (auto &&piece : fpieces)
       {
-	current_substring = fpieces[fr].string;
-	switch (fpieces[fr].argclass)
+	current_substring = piece.string;
+	switch (piece.argclass)
 	  {
 	  case string_arg:
 	    printf_c_string (stream, current_substring, val_args[i]);
@@ -2569,7 +2555,7 @@ ui_printf (const char *arg, struct ui_file *stream)
 	  case dec64float_arg:
 	  case dec128float_arg:
 	    printf_floating (stream, current_substring, val_args[i],
-			     fpieces[fr].argclass);
+			     piece.argclass);
 	    break;
 	  case ptr_arg:
 	    printf_pointer (stream, current_substring, val_args[i]);
@@ -2590,11 +2576,10 @@ ui_printf (const char *arg, struct ui_file *stream)
 			    _("failed internal consistency check"));
 	  }
 	/* Maybe advance to the next argument.  */
-	if (fpieces[fr].argclass != literal_piece)
+	if (piece.argclass != literal_piece)
 	  ++i;
       }
   }
-  do_cleanups (old_cleanups);
 }
 
 /* Implement the "printf" command.  */
-- 
2.13.6

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFA] C++-ify parse_format_string
  2017-11-23 16:46 [RFA] C++-ify parse_format_string Tom Tromey
@ 2017-11-23 21:14 ` Simon Marchi
  2017-11-23 22:40   ` Pedro Alves
  0 siblings, 1 reply; 11+ messages in thread
From: Simon Marchi @ 2017-11-23 21:14 UTC (permalink / raw)
  To: Tom Tromey, gdb-patches

On 2017-11-23 11:46 AM, Tom Tromey wrote:
> This replaces parse_format_string with a class, removing some
> constructors along the way.  While doing this, I found that one
> argument to gen_printf is unused, so I removed it.
> 
> Also, I am not completely sure, but the use of `release' in
> maint_agent_printf_command and parse_cmd_to_aexpr seems like it may
> leak expressions.

It looks fishy indeed.  You could change argvec to be a vector of
expression_up.

> Regression tested by the buildbot.

I have a patch in some branch that does essentially the same thing, so
I was able to compare our approaches.  In my version, I removed the
big allocation that is shared among pieces, and made each piece have
its own std::string.  Unless we want to keep the current allocation
scheme for performance/memory usage reasons, I think that using
std::strings simplifies things in the parse_format_string function.
The format_pieces structure is replaced with an std::vector of
format_piece.

I rebased it and stole some parts from your patch for other little cleanups
(e.g. remove unused argument, use vectors of expressions).  Here it is,
the 3rd from the top:

  https://github.com/simark/binutils-gdb/commits/vec-format_piece

Let me know what you think about the approach, if you think it's good I'll
get it in a mergeable state.

Simon

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFA] C++-ify parse_format_string
  2017-11-23 21:14 ` Simon Marchi
@ 2017-11-23 22:40   ` Pedro Alves
  2017-11-24  3:17     ` Simon Marchi
  0 siblings, 1 reply; 11+ messages in thread
From: Pedro Alves @ 2017-11-23 22:40 UTC (permalink / raw)
  To: Simon Marchi, Tom Tromey, gdb-patches

On 11/23/2017 09:13 PM, Simon Marchi wrote:

> I have a patch in some branch that does essentially the same thing, so
> I was able to compare our approaches.  In my version, I removed the
> big allocation that is shared among pieces, and made each piece have
> its own std::string.  Unless we want to keep the current allocation
> scheme for performance/memory usage reasons, I think that using
> std::strings simplifies things in the parse_format_string function.
> The format_pieces structure is replaced with an std::vector of
> format_piece.

Sounds like a step backwards to me.  If it simplifies things, then
it sounds like it might be because we're missing some utility,
like string_view or something like that.

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFA] C++-ify parse_format_string
  2017-11-23 22:40   ` Pedro Alves
@ 2017-11-24  3:17     ` Simon Marchi
  2017-11-24 12:54       ` Pedro Alves
  0 siblings, 1 reply; 11+ messages in thread
From: Simon Marchi @ 2017-11-24  3:17 UTC (permalink / raw)
  To: Pedro Alves, Simon Marchi, Tom Tromey, gdb-patches

On 2017-11-23 05:40 PM, Pedro Alves wrote:
> On 11/23/2017 09:13 PM, Simon Marchi wrote:
> 
>> I have a patch in some branch that does essentially the same thing, so
>> I was able to compare our approaches.  In my version, I removed the
>> big allocation that is shared among pieces, and made each piece have
>> its own std::string.  Unless we want to keep the current allocation
>> scheme for performance/memory usage reasons, I think that using
>> std::strings simplifies things in the parse_format_string function.
>> The format_pieces structure is replaced with an std::vector of
>> format_piece.
> 
> Sounds like a step backwards to me.  If it simplifies things, then
> it sounds like it might be because we're missing some utility,
> like string_view or something like that.

I am not sure I understand, can you expand a little bit about what you
have in mind?

What I meant is that is changes things like this:

	    strncpy (current_substring, percent_loc, length_before_ll);
	    strcpy (current_substring + length_before_ll, "I64");
	    current_substring[length_before_ll + 3] =
	      percent_loc[length_before_ll + lcount];
	    current_substring += length_before_ll + 4;

into this

	    piece_string.assign (percent_loc, length_before_ll);
	    piece_string += "I64";
	    piece_string += percent_loc[length_before_ll + lcount];

Less magical number, less playing with offsets, etc.

Simon

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFA] C++-ify parse_format_string
  2017-11-24  3:17     ` Simon Marchi
@ 2017-11-24 12:54       ` Pedro Alves
  2017-11-24 16:26         ` Simon Marchi
  2017-11-25 21:25         ` Tom Tromey
  0 siblings, 2 replies; 11+ messages in thread
From: Pedro Alves @ 2017-11-24 12:54 UTC (permalink / raw)
  To: Simon Marchi, Simon Marchi, Tom Tromey, gdb-patches

On 11/24/2017 03:17 AM, Simon Marchi wrote:
> On 2017-11-23 05:40 PM, Pedro Alves wrote:
>> On 11/23/2017 09:13 PM, Simon Marchi wrote:
>>
>>> I have a patch in some branch that does essentially the same thing, so
>>> I was able to compare our approaches.  In my version, I removed the
>>> big allocation that is shared among pieces, and made each piece have
>>> its own std::string.  Unless we want to keep the current allocation
>>> scheme for performance/memory usage reasons, I think that using
>>> std::strings simplifies things in the parse_format_string function.
>>> The format_pieces structure is replaced with an std::vector of
>>> format_piece.
>>
>> Sounds like a step backwards to me.  If it simplifies things, then
>> it sounds like it might be because we're missing some utility,
>> like string_view or something like that.
> 
> I am not sure I understand, can you expand a little bit about what you
> have in mind?

I call it a step backwards because simplification is done at the
cost of efficiency (individual heap allocations, changing the
ownership model), when the original code avoided that.  It's well known that
standard C++ is missing basic string manipulation utilities, but that's
no reason to force ourselves to consider std::string the ultimate tool, IMO.
There's C++ the library, and then there's the C++ the language.  We can't
do anything about the latter, but we can extend the former.  Later revisions
of the standard are adding more facilities, like std::string_view in C++17, and
now things like std::array_view / std::span (from gsl::span) have a very
good chance of making their way into C++20.  Many C++ projects end up
reinventing something like those types, hence the attempts at
standardization, making those types lingua franca for non-owning use cases.

> 
> What I meant is that is changes things like this:
> 
> 	    strncpy (current_substring, percent_loc, length_before_ll);
> 	    strcpy (current_substring + length_before_ll, "I64");
> 	    current_substring[length_before_ll + 3] =
> 	      percent_loc[length_before_ll + lcount];
> 	    current_substring += length_before_ll + 4;
> 
> into this
> 
> 	    piece_string.assign (percent_loc, length_before_ll);
> 	    piece_string += "I64";
> 	    piece_string += percent_loc[length_before_ll + lcount];
> 
> Less magical number, less playing with offsets, etc.

I meant that you don't need for each piece_string to own its
own deep copy of the string to get rid of the magical numbers.
You can get that even with a couple C-like free-functions, like:

 // memcpy, null-terminate, and return advanced pointer.
 char *append (char *p, const char *str);
 char *append (char *p, const char *str, size_t len);

and then:

 char *p = current_substring;

 p = append (p, percent_loc, length_before_ll);
 p = append (p, "I64");
 p = append (p, percent_loc[length_before_ll + lcount]);

 current_substring = p;

You could wrap that in a class, like:

 /* Helper to build an null-terminated C string.  */
 struct zstring_builder
 {
    /* POS is a pointer to the position in buffer to work on.  */
    explicit zstring_builder (char *pos)
      : m_pos (pos)
    {}
 
    // memcpy, advance m_pos, null-terminate.
    zstring_builder &append (const char *s, size_type count);
    zstring_builder &append (const char* s);
 
    char *pos () { return m_pos; }
 
 private:
    char *m_pos;
 };

Or:

 /* Helper to build a null-terminated C string on top
    of a vector.  */
 struct zstring_builder
 {
    /* POS is a pointer to the position in buffer to work on.  */
    explicit zstring_builder (std::vector<char> &buf, size_t pos);
 
    // memcpy, advance m_pos, null-terminate.
    zstring_builder &append (const char *s, size_type count);
    zstring_builder &append (const char* s);
 
    char *pos () { return &m_vector[m_pos]; }
 
 private:
    std::vector &m_buf;
    size_t m_pos;
 };

or something else along those lines, if you now make piece_string
a zstring_builder, you can get similar, and just as readable code:

            zstring_builder piece_string (current_substring);

            piece_string.append (percent_loc, length_before_ll);
 	    piece_string.append ("I64");
 	    piece_string.append (percent_loc[length_before_ll + lcount]);

            current_substring = piece_string.pos ();

Alternatively, we could have something like a zstring_ref
class [1], as a non-owning mutable view of a C string (that never
reallocates), guaranteed to be null-terminated, and then the code
you shown would be just exactly the same.  Open choices could be whether
to save the string size inside zstring_ref (like string_view), or
not saving a size, making it a really-thin wrapper around a
pointer instead. 

std::string is great as an _owning_ SSO-enabled (which is sometimes a
curse when embedded in structures...) container of characters.  But
when we don't want an owning container, the right tool / refactor can
still make the code just as readable.

[1] - I've thought several times because that something like a zstring_view
(like string_view, but guarantees null-termination) would be handy in
a lot of places.  string_view isn't mutable, though, hence the
alternative name instead.  could be mut_string_view too, for example.

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFA] C++-ify parse_format_string
  2017-11-24 12:54       ` Pedro Alves
@ 2017-11-24 16:26         ` Simon Marchi
  2017-11-25 21:25         ` Tom Tromey
  1 sibling, 0 replies; 11+ messages in thread
From: Simon Marchi @ 2017-11-24 16:26 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Simon Marchi, Tom Tromey, gdb-patches

On 2017-11-24 07:54, Pedro Alves wrote:
> On 11/24/2017 03:17 AM, Simon Marchi wrote:
>> On 2017-11-23 05:40 PM, Pedro Alves wrote:
>>> On 11/23/2017 09:13 PM, Simon Marchi wrote:
>>> 
>>>> I have a patch in some branch that does essentially the same thing, 
>>>> so
>>>> I was able to compare our approaches.  In my version, I removed the
>>>> big allocation that is shared among pieces, and made each piece have
>>>> its own std::string.  Unless we want to keep the current allocation
>>>> scheme for performance/memory usage reasons, I think that using
>>>> std::strings simplifies things in the parse_format_string function.
>>>> The format_pieces structure is replaced with an std::vector of
>>>> format_piece.
>>> 
>>> Sounds like a step backwards to me.  If it simplifies things, then
>>> it sounds like it might be because we're missing some utility,
>>> like string_view or something like that.
>> 
>> I am not sure I understand, can you expand a little bit about what you
>> have in mind?
> 
> I call it a step backwards because simplification is done at the
> cost of efficiency (individual heap allocations, changing the
> ownership model), when the original code avoided that.  It's well known 
> that
> standard C++ is missing basic string manipulation utilities, but that's
> no reason to force ourselves to consider std::string the ultimate tool, 
> IMO.
> There's C++ the library, and then there's the C++ the language.  We 
> can't
> do anything about the latter, but we can extend the former.  Later 
> revisions
> of the standard are adding more facilities, like std::string_view in 
> C++17, and
> now things like std::array_view / std::span (from gsl::span) have a 
> very
> good chance of making their way into C++20.  Many C++ projects end up
> reinventing something like those types, hence the attempts at
> standardization, making those types lingua franca for non-owning use 
> cases.

Thanks for the detailed reply!

In my original reply, I said "Unless we want to keep the current 
allocation scheme for performance/memory usage reasons", removing the 
common allocation was done consciously.  My thought was that the single 
allocation scheme had not been chosen for performance reason, but for 
simplicity.  Computing how much to malloc for every piece would have 
been very annoying.  Making each piece own its string made the code 
easier to reason about, but if the performance hit is not acceptable, I 
understand.

>> What I meant is that is changes things like this:
>> 
>> 	    strncpy (current_substring, percent_loc, length_before_ll);
>> 	    strcpy (current_substring + length_before_ll, "I64");
>> 	    current_substring[length_before_ll + 3] =
>> 	      percent_loc[length_before_ll + lcount];
>> 	    current_substring += length_before_ll + 4;
>> 
>> into this
>> 
>> 	    piece_string.assign (percent_loc, length_before_ll);
>> 	    piece_string += "I64";
>> 	    piece_string += percent_loc[length_before_ll + lcount];
>> 
>> Less magical number, less playing with offsets, etc.
> 
> I meant that you don't need for each piece_string to own its
> own deep copy of the string to get rid of the magical numbers.
> You can get that even with a couple C-like free-functions, like:
> 
>  // memcpy, null-terminate, and return advanced pointer.
>  char *append (char *p, const char *str);
>  char *append (char *p, const char *str, size_t len);
> 
> and then:
> 
>  char *p = current_substring;
> 
>  p = append (p, percent_loc, length_before_ll);
>  p = append (p, "I64");
>  p = append (p, percent_loc[length_before_ll + lcount]);
> 
>  current_substring = p;
> 
> You could wrap that in a class, like:
> 
>  /* Helper to build an null-terminated C string.  */
>  struct zstring_builder
>  {
>     /* POS is a pointer to the position in buffer to work on.  */
>     explicit zstring_builder (char *pos)
>       : m_pos (pos)
>     {}
> 
>     // memcpy, advance m_pos, null-terminate.
>     zstring_builder &append (const char *s, size_type count);
>     zstring_builder &append (const char* s);
> 
>     char *pos () { return m_pos; }
> 
>  private:
>     char *m_pos;
>  };
> 
> Or:
> 
>  /* Helper to build a null-terminated C string on top
>     of a vector.  */
>  struct zstring_builder
>  {
>     /* POS is a pointer to the position in buffer to work on.  */
>     explicit zstring_builder (std::vector<char> &buf, size_t pos);
> 
>     // memcpy, advance m_pos, null-terminate.
>     zstring_builder &append (const char *s, size_type count);
>     zstring_builder &append (const char* s);
> 
>     char *pos () { return &m_vector[m_pos]; }
> 
>  private:
>     std::vector &m_buf;
>     size_t m_pos;
>  };

I agree that something like this is good if we want to keep the current 
allocation scheme but make the code clearer.  Since "append" continues 
the same string piece, I think we would need some kind of "finish" 
method (a bit like obstack) to be able to start a new string:

   zstring_builder builder (buf);
   builder.append ("Hello ");
   builder.append ("world!");
   const char *english = builder.finish ();
   builder.append ("Bonjour ");
   builder.append ("monde!");
   const char *french = builder.finish ();

giving something like this:

   Hello world!\0Bonjour monde!\0

> or something else along those lines, if you now make piece_string
> a zstring_builder, you can get similar, and just as readable code:
> 
>             zstring_builder piece_string (current_substring);
> 
>             piece_string.append (percent_loc, length_before_ll);
>  	    piece_string.append ("I64");
>  	    piece_string.append (percent_loc[length_before_ll + lcount]);
> 
>             current_substring = piece_string.pos ();
> 
> Alternatively, we could have something like a zstring_ref
> class [1], as a non-owning mutable view of a C string (that never
> reallocates), guaranteed to be null-terminated, and then the code
> you shown would be just exactly the same.  Open choices could be 
> whether
> to save the string size inside zstring_ref (like string_view), or
> not saving a size, making it a really-thin wrapper around a
> pointer instead.
> 
> std::string is great as an _owning_ SSO-enabled (which is sometimes a
> curse when embedded in structures...) container of characters.  But
> when we don't want an owning container, the right tool / refactor can
> still make the code just as readable.
> 
> [1] - I've thought several times because that something like a 
> zstring_view
> (like string_view, but guarantees null-termination) would be handy in
> a lot of places.  string_view isn't mutable, though, hence the
> alternative name instead.  could be mut_string_view too, for example.

It's all performance/complexity tradeoffs, as usual :)

Simon

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFA] C++-ify parse_format_string
  2017-11-24 12:54       ` Pedro Alves
  2017-11-24 16:26         ` Simon Marchi
@ 2017-11-25 21:25         ` Tom Tromey
  2017-12-02 20:31           ` [PATCH] " Simon Marchi
  1 sibling, 1 reply; 11+ messages in thread
From: Tom Tromey @ 2017-11-25 21:25 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Simon Marchi, Simon Marchi, Tom Tromey, gdb-patches

>>>>> "Pedro" == Pedro Alves <palves@redhat.com> writes:

Pedro> I call it a step backwards because simplification is done at the
Pedro> cost of efficiency (individual heap allocations, changing the
Pedro> ownership model), when the original code avoided that.

I agree with this principle in general, but I think that in this
particular case, the efficiency and/or allocation argument is not very
strong -- I just doubt this code is space- or time-sensitive.

So on the whole I think in this case it would be fine to use Simon's
patch.

Pedro> [1] - I've thought several times because that something like a zstring_view
Pedro> (like string_view, but guarantees null-termination) would be handy in
Pedro> a lot of places.  string_view isn't mutable, though, hence the
Pedro> alternative name instead.  could be mut_string_view too, for example.

In gdb I've often wanted something like "std::string API but
unique_xmalloc_ptr allocation policy".  That way one could get the handy
string methods but allow nicer interfacing to things already using
xmalloc (I guess primarily libiberty but also readline and sometimes
BFD).

The main danger I see of views is that then one must reason about
lifetimes in order to use them.  But in gdb maybe this can be handled
via some relatively simple rules, like "never store a view on the heap".

Tom

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] C++-ify parse_format_string
  2017-11-25 21:25         ` Tom Tromey
@ 2017-12-02 20:31           ` Simon Marchi
  2017-12-03 14:12             ` Pedro Alves
  0 siblings, 1 reply; 11+ messages in thread
From: Simon Marchi @ 2017-12-02 20:31 UTC (permalink / raw)
  To: gdb-patches; +Cc: Tom Tromey, Pedro Alves, Simon Marchi

From: Simon Marchi <simon.marchi@ericsson.com>

I finally got around to polish this patch.  Let me know what you think.

This patch C++ifies the parse_format_string function and its return
value, allowing to get rid of free_format_pieces_cleanup.  Currently,
format_piece::string points into a big buffer shared by all format
pieces resulting of the format string parse.  To make the code and
allocation scheme simpler to understand, this patch makes each piece own
its string with an std::string.

I've tried to measure the impact of this change on the time taken by
parse_format_string in an optimized build.  I ran

  $ perf record -g -- ./gdb -nx -batch -x test.gdb

with test.gdb:

  set $a = 0
  while $a < 20000
    printf "%daaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa<16 times>\n", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
    set $a = $a + 1
  end

The purpose of the long string of a's is to have some format pieces for
which the substring doesn't fit in the std::string local buffer.  The
command "perf report" shows this for current master:

  1.81%     1.81%  gdb  gdb  [.] parse_format_string

and this with the patch:

  1.69%     1.69%  gdb  gdb  [.] parse_format_string

So it doesn't seem like it makes much of a difference.  If you have tips
on how to use perf to get more precise measurements, I would be glad to
hear them.

gdb/ChangeLog:

	* common/format.h (struct format_piece): Add constructor.
	<string>: Change type to std::string.
	(parse_format_string): Return an std::vector of format_piece.
	(free_format_pieces): Remove.
	(free_format_pieces_cleanup): Remove.
	* common/format.c (parse_format_string): Return an std::vector of
	format_piece.  Don't allocate a big buffer, adjust to those
	changes.
	(free_format_pieces): Remove.
	(free_format_pieces_cleanup): Remove.
	* ax-gdb.c (gen_printf): Remove format_piece * parameter.
	(maint_agent_printf_command): Adjust to parse_format_string
	changes.
	* ax-gdb.h (struct format_piece): Don't forward-declare.
	(gen_printf): Remove format_piece * parameter.
	* breakpoint.c (parse_cmd_to_aexpr): Adjust to
	parse_format_string chagnes.
	* printcmd.c (ui_printf): Likewise.

gdb/gdbserver/ChangeLog:

	* ax.c (ax_printf): Adjust to parse_format_string changes.
---
 gdb/ax-gdb.c        | 22 ++++--------
 gdb/ax-gdb.h        |  2 --
 gdb/breakpoint.c    | 24 ++++---------
 gdb/common/format.c | 99 +++++++----------------------------------------------
 gdb/common/format.h | 19 ++++------
 gdb/gdbserver/ax.c  | 34 +++++++++---------
 gdb/printcmd.c      | 41 ++++++++++------------
 7 files changed, 67 insertions(+), 174 deletions(-)

diff --git a/gdb/ax-gdb.c b/gdb/ax-gdb.c
index 5027f6a464..6c0d0f29f9 100644
--- a/gdb/ax-gdb.c
+++ b/gdb/ax-gdb.c
@@ -2541,7 +2541,6 @@ agent_expr_up
 gen_printf (CORE_ADDR scope, struct gdbarch *gdbarch,
 	    CORE_ADDR function, LONGEST channel,
 	    const char *format, int fmtlen,
-	    struct format_piece *frags,
 	    int nargs, struct expression **exprs)
 {
   agent_expr_up ax (new agent_expr (gdbarch, scope));
@@ -2681,12 +2680,7 @@ agent_eval_command (const char *exp, int from_tty)
 static void
 maint_agent_printf_command (const char *cmdrest, int from_tty)
 {
-  struct cleanup *old_chain = 0;
-  struct expression *argvec[100];
   struct frame_info *fi = get_current_frame ();	/* need current scope */
-  const char *format_start, *format_end;
-  struct format_piece *fpieces;
-  int nargs;
 
   /* We don't deal with overlay debugging at the moment.  We need to
      think more carefully about this.  If you copy this code into
@@ -2703,13 +2697,11 @@ maint_agent_printf_command (const char *cmdrest, int from_tty)
   if (*cmdrest++ != '"')
     error (_("Must start with a format string."));
 
-  format_start = cmdrest;
+  const char *format_start = cmdrest;
 
-  fpieces = parse_format_string (&cmdrest);
+  parse_format_string (&cmdrest);
 
-  old_chain = make_cleanup (free_format_pieces_cleanup, &fpieces);
-
-  format_end = cmdrest;
+  const char *format_end = cmdrest;
 
   if (*cmdrest++ != '"')
     error (_("Bad format string, non-terminated '\"'."));
@@ -2723,15 +2715,14 @@ maint_agent_printf_command (const char *cmdrest, int from_tty)
     cmdrest++;
   cmdrest = skip_spaces (cmdrest);
 
-  nargs = 0;
+  std::vector<struct expression *> argvec;
   while (*cmdrest != '\0')
     {
       const char *cmd1;
 
       cmd1 = cmdrest;
       expression_up expr = parse_exp_1 (&cmd1, 0, (struct block *) 0, 1);
-      argvec[nargs] = expr.release ();
-      ++nargs;
+      argvec.push_back (expr.release ());
       cmdrest = cmd1;
       if (*cmdrest == ',')
 	++cmdrest;
@@ -2742,14 +2733,13 @@ maint_agent_printf_command (const char *cmdrest, int from_tty)
   agent_expr_up agent = gen_printf (get_frame_pc (fi), get_current_arch (),
 				    0, 0,
 				    format_start, format_end - format_start,
-				    fpieces, nargs, argvec);
+				    argvec.size (), argvec.data ());
   ax_reqs (agent.get ());
   ax_print (gdb_stdout, agent.get ());
 
   /* It would be nice to call ax_reqs here to gather some general info
      about the expression, and then print out the result.  */
 
-  do_cleanups (old_chain);
   dont_repeat ();
 }
 
diff --git a/gdb/ax-gdb.h b/gdb/ax-gdb.h
index 8b5ab46c66..834ddffe05 100644
--- a/gdb/ax-gdb.h
+++ b/gdb/ax-gdb.h
@@ -120,10 +120,8 @@ extern void gen_expr (struct expression *exp, union exp_element **pc,
 
 extern void require_rvalue (struct agent_expr *ax, struct axs_value *value);
 
-struct format_piece;
 extern agent_expr_up gen_printf (CORE_ADDR, struct gdbarch *,
 				 CORE_ADDR, LONGEST, const char *, int,
-				 struct format_piece *,
 				 int, struct expression **);
 
 #endif /* AX_GDB_H */
diff --git a/gdb/breakpoint.c b/gdb/breakpoint.c
index d4d095d87e..065ac002f7 100644
--- a/gdb/breakpoint.c
+++ b/gdb/breakpoint.c
@@ -2230,12 +2230,7 @@ build_target_condition_list (struct bp_location *bl)
 static agent_expr_up
 parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
 {
-  struct cleanup *old_cleanups = 0;
-  struct expression **argvec;
   const char *cmdrest;
-  const char *format_start, *format_end;
-  struct format_piece *fpieces;
-  int nargs;
   struct gdbarch *gdbarch = get_current_arch ();
 
   if (cmd == NULL)
@@ -2250,13 +2245,11 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
   if (*cmdrest++ != '"')
     error (_("No format string following the location"));
 
-  format_start = cmdrest;
+  const char *format_start = cmdrest;
 
-  fpieces = parse_format_string (&cmdrest);
+  parse_format_string (&cmdrest);
 
-  old_cleanups = make_cleanup (free_format_pieces_cleanup, &fpieces);
-
-  format_end = cmdrest;
+  const char *format_end = cmdrest;
 
   if (*cmdrest++ != '"')
     error (_("Bad format string, non-terminated '\"'."));
@@ -2272,17 +2265,14 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
 
   /* For each argument, make an expression.  */
 
-  argvec = (struct expression **) alloca (strlen (cmd)
-					 * sizeof (struct expression *));
-
-  nargs = 0;
+  std::vector<struct expression *> argvec;
   while (*cmdrest != '\0')
     {
       const char *cmd1;
 
       cmd1 = cmdrest;
       expression_up expr = parse_exp_1 (&cmd1, scope, block_for_pc (scope), 1);
-      argvec[nargs++] = expr.release ();
+      argvec.push_back (expr.release ());
       cmdrest = cmd1;
       if (*cmdrest == ',')
 	++cmdrest;
@@ -2296,7 +2286,7 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
     {
       aexpr = gen_printf (scope, gdbarch, 0, 0,
 			  format_start, format_end - format_start,
-			  fpieces, nargs, argvec);
+			  argvec.size (), argvec.data ());
     }
   CATCH (ex, RETURN_MASK_ERROR)
     {
@@ -2306,8 +2296,6 @@ parse_cmd_to_aexpr (CORE_ADDR scope, char *cmd)
     }
   END_CATCH
 
-  do_cleanups (old_cleanups);
-
   /* We have a valid agent expression, return it.  */
   return aexpr;
 }
diff --git a/gdb/common/format.c b/gdb/common/format.c
index 8cb15511fa..b0c7268c8f 100644
--- a/gdb/common/format.c
+++ b/gdb/common/format.c
@@ -20,17 +20,14 @@
 #include "common-defs.h"
 #include "format.h"
 
-struct format_piece *
+std::vector<format_piece>
 parse_format_string (const char **arg)
 {
   const char *s;
   char *f, *string;
   const char *prev_start;
   const char *percent_loc;
-  char *sub_start, *current_substring;
-  struct format_piece *pieces;
-  int next_frag;
-  int max_pieces;
+  std::vector<format_piece> pieces;
   enum argclass this_argclass;
 
   s = *arg;
@@ -97,16 +94,6 @@ parse_format_string (const char **arg)
      done with it; it's up to callers to complain about syntax.  */
   *arg = s;
 
-  /* Need extra space for the '\0's.  Doubling the size is sufficient.  */
-
-  current_substring = (char *) xmalloc (strlen (string) * 2 + 1000);
-
-  max_pieces = strlen (string) + 2;
-
-  pieces = XNEWVEC (struct format_piece, max_pieces);
-
-  next_frag = 0;
-
   /* Now scan the string for %-specs and see what kinds of args they want.
      argclass classifies the %-specs so we can give printf-type functions
      something of the right size.  */
@@ -129,15 +116,8 @@ parse_format_string (const char **arg)
 	    continue;
 	  }
 
-	sub_start = current_substring;
-
-	strncpy (current_substring, prev_start, f - 1 - prev_start);
-	current_substring += f - 1 - prev_start;
-	*current_substring++ = '\0';
-
-	pieces[next_frag].string = sub_start;
-	pieces[next_frag].argclass = literal_piece;
-	next_frag++;
+	pieces.emplace_back (std::string (prev_start, f - 1 - prev_start),
+			     literal_piece);
 
 	percent_loc = f - 1;
 
@@ -309,7 +289,7 @@ parse_format_string (const char **arg)
 
 	f++;
 
-	sub_start = current_substring;
+	std::string piece_string;
 
 	if (lcount > 1 && USE_PRINTF_I64)
 	  {
@@ -317,11 +297,9 @@ parse_format_string (const char **arg)
 	       Convert %lld to %I64d.  */
 	    int length_before_ll = f - percent_loc - 1 - lcount;
 
-	    strncpy (current_substring, percent_loc, length_before_ll);
-	    strcpy (current_substring + length_before_ll, "I64");
-	    current_substring[length_before_ll + 3] =
-	      percent_loc[length_before_ll + lcount];
-	    current_substring += length_before_ll + 4;
+	    piece_string.assign (percent_loc, length_before_ll);
+	    piece_string += "I64";
+	    piece_string += percent_loc[length_before_ll + lcount];
 	  }
 	else if (this_argclass == wide_string_arg
 		 || this_argclass == wide_char_arg)
@@ -329,71 +307,20 @@ parse_format_string (const char **arg)
 	    /* Convert %ls or %lc to %s.  */
 	    int length_before_ls = f - percent_loc - 2;
 
-	    strncpy (current_substring, percent_loc, length_before_ls);
-	    strcpy (current_substring + length_before_ls, "s");
-	    current_substring += length_before_ls + 2;
+	    piece_string.assign (percent_loc, length_before_ls);
+	    piece_string += 's';
 	  }
 	else
-	  {
-	    strncpy (current_substring, percent_loc, f - percent_loc);
-	    current_substring += f - percent_loc;
-	  }
-
-	*current_substring++ = '\0';
+	  piece_string.assign (percent_loc, f - percent_loc);
 
 	prev_start = f;
 
-	pieces[next_frag].string = sub_start;
-	pieces[next_frag].argclass = this_argclass;
-	next_frag++;
+	pieces.emplace_back (std::move (piece_string), this_argclass);
       }
 
   /* Record the remainder of the string.  */
 
-  sub_start = current_substring;
-
-  strncpy (current_substring, prev_start, f - prev_start);
-  current_substring += f - prev_start;
-  *current_substring++ = '\0';
-
-  pieces[next_frag].string = sub_start;
-  pieces[next_frag].argclass = literal_piece;
-  next_frag++;
-
-  /* Record an end-of-array marker.  */
-
-  pieces[next_frag].string = NULL;
-  pieces[next_frag].argclass = literal_piece;
+  pieces.emplace_back (std::string (prev_start, f - prev_start), literal_piece);
 
   return pieces;
 }
-
-void
-free_format_pieces (struct format_piece *pieces)
-{
-  if (!pieces)
-    return;
-
-  /* We happen to know that all the string pieces are in the block
-     pointed to by the first string piece.  */
-  if (pieces[0].string)
-    xfree (pieces[0].string);
-
-  xfree (pieces);
-}
-
-void
-free_format_pieces_cleanup (void *ptr)
-{
-  struct format_piece **location = (struct format_piece **) ptr;
-
-  if (location == NULL)
-    return;
-
-  if (*location != NULL)
-    {
-      free_format_pieces (*location);
-      *location = NULL;
-    }
-}
-
diff --git a/gdb/common/format.h b/gdb/common/format.h
index f3a94b8bbb..558f719efe 100644
--- a/gdb/common/format.h
+++ b/gdb/common/format.h
@@ -48,22 +48,17 @@ enum argclass
 
 struct format_piece
 {
-  char *string;
+  format_piece (std::string &&string_, enum argclass argclass_)
+  : string (std::move (string_)), argclass (argclass_)
+  {}
+
+  std::string string;
   enum argclass argclass;
 };
 
-/* Return an array of printf fragments found at the given string, and
+/* Return a vector of printf fragments found at the given string, and
    rewrite ARG with a pointer to the end of the format string.  */
 
-extern struct format_piece *parse_format_string (const char **arg);
-
-/* Given a pointer to an array of format pieces, free any memory that
-   would have been allocated by parse_format_string.  */
-
-extern void free_format_pieces (struct format_piece *frags);
-
-/* Freeing, cast as a cleanup.  */
-
-extern void free_format_pieces_cleanup (void *);
+extern std::vector<format_piece> parse_format_string (const char **arg);
 
 #endif /* COMMON_FORMAT_H */
diff --git a/gdb/gdbserver/ax.c b/gdb/gdbserver/ax.c
index 35ed2c69ad..46a1399e0b 100644
--- a/gdb/gdbserver/ax.c
+++ b/gdb/gdbserver/ax.c
@@ -816,30 +816,29 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 	   int nargs, ULONGEST *args)
 {
   const char *f = format;
-  struct format_piece *fpieces;
-  int i, fp;
-  char *current_substring;
+  int i;
   int nargs_wanted;
 
   ax_debug ("Printf of \"%s\" with %d args", format, nargs);
 
-  fpieces = parse_format_string (&f);
+  std::vector<format_piece> fpieces = parse_format_string (&f);
 
   nargs_wanted = 0;
-  for (fp = 0; fpieces[fp].string != NULL; fp++)
-    if (fpieces[fp].argclass != literal_piece)
+  for (const format_piece &piece : fpieces)
+    if (piece.argclass != literal_piece)
       ++nargs_wanted;
 
   if (nargs != nargs_wanted)
     error (_("Wrong number of arguments for specified format-string"));
 
   i = 0;
-  for (fp = 0; fpieces[fp].string != NULL; fp++)
+  for (const format_piece &piece : fpieces)
     {
-      current_substring = fpieces[fp].string;
+      const char *fmt = piece.string.c_str ();
+
       ax_debug ("current substring is '%s', class is %d",
-		current_substring, fpieces[fp].argclass);
-      switch (fpieces[fp].argclass)
+		fmt, piece.argclass);
+      switch (piece.argclass)
 	{
 	case string_arg:
 	  {
@@ -865,7 +864,7 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 		read_inferior_memory (tem, str, j);
 	      str[j] = 0;
 
-              printf (current_substring, (char *) str);
+              printf (fmt, (char *) str);
 	    }
 	    break;
 
@@ -874,7 +873,7 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 	    {
 	      long long val = args[i];
 
-              printf (current_substring, val);
+              printf (fmt, val);
 	      break;
 	    }
 #else
@@ -884,7 +883,7 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 	  {
 	    int val = args[i];
 
-	    printf (current_substring, val);
+	    printf (fmt, val);
 	    break;
 	  }
 
@@ -892,7 +891,7 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 	  {
 	    long val = args[i];
 
-	    printf (current_substring, val);
+	    printf (fmt, val);
 	    break;
 	  }
 
@@ -905,20 +904,19 @@ ax_printf (CORE_ADDR fn, CORE_ADDR chan, const char *format,
 	     have modified GCC to include -Wformat-security by
 	     default, which will warn here if there is no
 	     argument.  */
-	  printf (current_substring, 0);
+	  printf (fmt, 0);
 	  break;
 
 	default:
 	  error (_("Format directive in '%s' not supported in agent printf"),
-		 current_substring);
+		 fmt);
 	}
 
       /* Maybe advance to the next argument.  */
-      if (fpieces[fp].argclass != literal_piece)
+      if (piece.argclass != literal_piece)
 	++i;
     }
 
-  free_format_pieces (fpieces);
   fflush (stdout);
 }
 
diff --git a/gdb/printcmd.c b/gdb/printcmd.c
index 5d4417fa06..850d5ec9de 100644
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@@ -2427,7 +2427,6 @@ printf_pointer (struct ui_file *stream, const char *format,
 static void
 ui_printf (const char *arg, struct ui_file *stream)
 {
-  struct format_piece *fpieces;
   const char *s = arg;
   struct value **val_args;
   int allocated_args = 20;
@@ -2447,9 +2446,7 @@ ui_printf (const char *arg, struct ui_file *stream)
   if (*s++ != '"')
     error (_("Bad format string, missing '\"'."));
 
-  fpieces = parse_format_string (&s);
-
-  make_cleanup (free_format_pieces_cleanup, &fpieces);
+  std::vector<format_piece> fpieces = parse_format_string (&s);
 
   if (*s++ != '"')
     error (_("Bad format string, non-terminated '\"'."));
@@ -2466,12 +2463,11 @@ ui_printf (const char *arg, struct ui_file *stream)
   {
     int nargs = 0;
     int nargs_wanted;
-    int i, fr;
-    char *current_substring;
+    int i;
 
     nargs_wanted = 0;
-    for (fr = 0; fpieces[fr].string != NULL; fr++)
-      if (fpieces[fr].argclass != literal_piece)
+    for (const format_piece &piece : fpieces)
+      if (piece.argclass != literal_piece)
 	++nargs_wanted;
 
     /* Now, parse all arguments and evaluate them.
@@ -2499,16 +2495,17 @@ ui_printf (const char *arg, struct ui_file *stream)
 
     /* Now actually print them.  */
     i = 0;
-    for (fr = 0; fpieces[fr].string != NULL; fr++)
+    for (const format_piece &piece : fpieces)
       {
-	current_substring = fpieces[fr].string;
-	switch (fpieces[fr].argclass)
+	const char *fmt = piece.string.c_str ();
+
+	switch (piece.argclass)
 	  {
 	  case string_arg:
-	    printf_c_string (stream, current_substring, val_args[i]);
+	    printf_c_string (stream, fmt, val_args[i]);
 	    break;
 	  case wide_string_arg:
-	    printf_wide_c_string (stream, current_substring, val_args[i]);
+	    printf_wide_c_string (stream, fmt, val_args[i]);
 	    break;
 	  case wide_char_arg:
 	    {
@@ -2535,7 +2532,7 @@ ui_printf (const char *arg, struct ui_file *stream)
 					 &output, translit_char);
 	      obstack_grow_str0 (&output, "");
 
-	      fprintf_filtered (stream, current_substring,
+	      fprintf_filtered (stream, fmt,
                                 obstack_base (&output));
 	    }
 	    break;
@@ -2544,7 +2541,7 @@ ui_printf (const char *arg, struct ui_file *stream)
 	    {
 	      long long val = value_as_long (val_args[i]);
 
-              fprintf_filtered (stream, current_substring, val);
+              fprintf_filtered (stream, fmt, val);
 	      break;
 	    }
 #else
@@ -2554,14 +2551,14 @@ ui_printf (const char *arg, struct ui_file *stream)
 	    {
 	      int val = value_as_long (val_args[i]);
 
-              fprintf_filtered (stream, current_substring, val);
+              fprintf_filtered (stream, fmt, val);
 	      break;
 	    }
 	  case long_arg:
 	    {
 	      long val = value_as_long (val_args[i]);
 
-              fprintf_filtered (stream, current_substring, val);
+              fprintf_filtered (stream, fmt, val);
 	      break;
 	    }
 	  /* Handles floating-point values.  */
@@ -2570,11 +2567,11 @@ ui_printf (const char *arg, struct ui_file *stream)
 	  case dec32float_arg:
 	  case dec64float_arg:
 	  case dec128float_arg:
-	    printf_floating (stream, current_substring, val_args[i],
-			     fpieces[fr].argclass);
+	    printf_floating (stream, fmt, val_args[i],
+			     piece.argclass);
 	    break;
 	  case ptr_arg:
-	    printf_pointer (stream, current_substring, val_args[i]);
+	    printf_pointer (stream, fmt, val_args[i]);
 	    break;
 	  case literal_piece:
 	    /* Print a portion of the format string that has no
@@ -2585,14 +2582,14 @@ ui_printf (const char *arg, struct ui_file *stream)
 	       have modified GCC to include -Wformat-security by
 	       default, which will warn here if there is no
 	       argument.  */
-	    fprintf_filtered (stream, current_substring, 0);
+	    fprintf_filtered (stream, fmt, 0);
 	    break;
 	  default:
 	    internal_error (__FILE__, __LINE__,
 			    _("failed internal consistency check"));
 	  }
 	/* Maybe advance to the next argument.  */
-	if (fpieces[fr].argclass != literal_piece)
+	if (piece.argclass != literal_piece)
 	  ++i;
       }
   }
-- 
2.15.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] C++-ify parse_format_string
  2017-12-02 20:31           ` [PATCH] " Simon Marchi
@ 2017-12-03 14:12             ` Pedro Alves
  2017-12-03 17:50               ` Simon Marchi
  0 siblings, 1 reply; 11+ messages in thread
From: Pedro Alves @ 2017-12-03 14:12 UTC (permalink / raw)
  To: Simon Marchi, gdb-patches; +Cc: Tom Tromey, Simon Marchi

On 12/02/2017 08:31 PM, Simon Marchi wrote:
> From: Simon Marchi <simon.marchi@ericsson.com>
> 
> I finally got around to polish this patch.  Let me know what you think.
> 
> This patch C++ifies the parse_format_string function and its return
> value, allowing to get rid of free_format_pieces_cleanup.  Currently,
> format_piece::string points into a big buffer shared by all format
> pieces resulting of the format string parse.  To make the code and
> allocation scheme simpler to understand, this patch makes each piece own
> its string with an std::string.
> 
> I've tried to measure the impact of this change on the time taken by
> parse_format_string in an optimized build.  I ran
> 
>   $ perf record -g -- ./gdb -nx -batch -x test.gdb

I like using:

 $ perf record --call-graph dwarf,65528 -- ....

so that perf report has more accurate callchain info.

Otherwise, compile with -fno-omit-frame-pointer (but that's no
longer testing what you normally compile to).

> 
> with test.gdb:
> 
>   set $a = 0
>   while $a < 20000
>     printf "%daaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa<16 times>\n", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
>     set $a = $a + 1
>   end
> 
> The purpose of the long string of a's is to have some format pieces for
> which the substring doesn't fit in the std::string local buffer.  The
> command "perf report" shows this for current master:
> 
>   1.81%     1.81%  gdb  gdb  [.] parse_format_string
> 
> and this with the patch:
> 
>   1.69%     1.69%  gdb  gdb  [.] parse_format_string
> 
> So it doesn't seem like it makes much of a difference.  If you have tips
> on how to use perf to get more precise measurements, I would be glad to
> hear them.

Use plain "time" first.  (You may have shifted work
out of parse_format_string.)

With your test script as is, I get around:

 real    0m1.053s
 user    0m0.985s
 sys     0m0.068s

I like a testcase that runs a bit longer in order to have a
better signal/noise ratio.  IMO 1 second is too short.  Bumping the
number of iterations 10x (to 200000), I get (representative
of a few runs),

before patch:

 real    0m9.432s
 user    0m8.978s
 sys     0m0.444s

after patch:

 real    0m10.322s
 user    0m9.854s
 sys     0m0.454s

there's some variation across runs, but after-patch compared
to before-patch is consistently around 7%-11% slower, for me.

Looking at "perf report -g", we see we're very inefficient
while handling this specific use case, spending a lot of time
in the C parser and in fprintf_filtered.  If those paths
were tuned a bit the relative slowdown of parse_format_string
would probably be more pronounced.

Note that parse_format_string is used to implement target-side
dprintf, in gdbserver/ax.c:ax_printf.  This means that a slow
down here affects inferior execution time too, which may be
a less-contrived scenario.

My point is still the same as ever -- it's not possible
upfront to envision all the use cases users throw at us,
given gdb scripting, etc.  So if easy, I think it's better
to avoid premature pessimization:

 https://sourceware.org/ml/gdb-patches/2017-06/msg00506.html

(Note: I have not really looked at the patch in any detail.)

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] C++-ify parse_format_string
  2017-12-03 14:12             ` Pedro Alves
@ 2017-12-03 17:50               ` Simon Marchi
  2017-12-08 16:22                 ` Tom Tromey
  0 siblings, 1 reply; 11+ messages in thread
From: Simon Marchi @ 2017-12-03 17:50 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches, Tom Tromey, Simon Marchi

On 2017-12-03 09:12, Pedro Alves wrote:
> Use plain "time" first.  (You may have shifted work
> out of parse_format_string.)
> 
> With your test script as is, I get around:
> 
>  real    0m1.053s
>  user    0m0.985s
>  sys     0m0.068s
> 
> I like a testcase that runs a bit longer in order to have a
> better signal/noise ratio.  IMO 1 second is too short.  Bumping the
> number of iterations 10x (to 200000), I get (representative
> of a few runs),
> 
> before patch:
> 
>  real    0m9.432s
>  user    0m8.978s
>  sys     0m0.444s
> 
> after patch:
> 
>  real    0m10.322s
>  user    0m9.854s
>  sys     0m0.454s
> 
> there's some variation across runs, but after-patch compared
> to before-patch is consistently around 7%-11% slower, for me.

Indeed, I see similar results (except 200000 iterations take 30 seconds 
for me with my 10 years old CPU :)).

> Looking at "perf report -g", we see we're very inefficient
> while handling this specific use case, spending a lot of time
> in the C parser and in fprintf_filtered.  If those paths
> were tuned a bit the relative slowdown of parse_format_string
> would probably be more pronounced.
> 
> Note that parse_format_string is used to implement target-side
> dprintf, in gdbserver/ax.c:ax_printf.  This means that a slow
> down here affects inferior execution time too, which may be
> a less-contrived scenario.

I agree that we should not make parse_format_string slow just for fun, 
but in this particular case I think we should avoid parsing the format 
string on the fast path.  It should be possible to chew it to pieces 
earlier (when installing the breakpoint) and just access the pieces when 
we hit the dprintf.

> My point is still the same as ever -- it's not possible
> upfront to envision all the use cases users throw at us,
> given gdb scripting, etc.  So if easy, I think it's better
> to avoid premature pessimization:
> 
>  https://sourceware.org/ml/gdb-patches/2017-06/msg00506.html

The article you link defines premature pessimization as "when 
equivalently complex code would be faster".  In this case, the code is 
more complex (though maybe not by much) with a shared buffer, and I was 
wondering if that complexity was really worth the speedup, especially 
for something not expected to be on a fast path.  But since it's already 
implemented like this and we know it's faster, I agree it feels like a 
step backwards to remove it.  So I have no problem going with Tom's 
patch.

Simon

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] C++-ify parse_format_string
  2017-12-03 17:50               ` Simon Marchi
@ 2017-12-08 16:22                 ` Tom Tromey
  0 siblings, 0 replies; 11+ messages in thread
From: Tom Tromey @ 2017-12-08 16:22 UTC (permalink / raw)
  To: Simon Marchi; +Cc: Pedro Alves, gdb-patches, Tom Tromey, Simon Marchi

Simon> So I have no problem going with Tom's patch.

Thanks for following up on this.
I'll push it soon.

Tom

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-12-08 16:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-23 16:46 [RFA] C++-ify parse_format_string Tom Tromey
2017-11-23 21:14 ` Simon Marchi
2017-11-23 22:40   ` Pedro Alves
2017-11-24  3:17     ` Simon Marchi
2017-11-24 12:54       ` Pedro Alves
2017-11-24 16:26         ` Simon Marchi
2017-11-25 21:25         ` Tom Tromey
2017-12-02 20:31           ` [PATCH] " Simon Marchi
2017-12-03 14:12             ` Pedro Alves
2017-12-03 17:50               ` Simon Marchi
2017-12-08 16:22                 ` Tom Tromey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).