public inbox for archer@sourceware.org
 help / color / mirror / Atom feed
* [python][patch] Add options length parameter to value.string(...)
@ 2009-04-06 16:54 Phil Muldoon
  2009-04-06 22:14 ` Tom Tromey
  0 siblings, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-06 16:54 UTC (permalink / raw)
  To: Project Archer

[-- Attachment #1: Type: text/plain, Size: 869 bytes --]

This patch adds an optional length parameter to the value.string() 
method.  This allows partial string fetches, and string fetches that 
ignore embedded null characters in the C/C++ string case.  If the length 
parameter is omitted, the method will maintain the existing behaviour of 
fetching a string until a terminating null in the C/C++ case.

Regards

Phil


ChangeLog

2009-04-06  Phil Muldoon  <pmuldoon@redhat.com>

    * python/python-value.c (valpy_string): Parse length keyword. Use
    length keyword in LA_GET_STRING.
    * c-lang.c (c_get_string): If the length parameter is specified,
    use that.

testsuite/ChangeLog

2009-04-06  Phil Muldoon  <pmuldoon@redhat.com>

    * gdb.python/python-value.exp (test_value_in_inferior): Add
    variable length string fetch tests.
    * gdb.python/python-value.c (main): Add strings for string fetch tests.




[-- Attachment #2: valpy_variable_string_fetch.patch --]
[-- Type: text/x-patch, Size: 4957 bytes --]

diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 8b5410f..344c836 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -249,12 +249,17 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
       int i;
       const gdb_byte *contents = value_contents (value);
 
-      /* Look for a null character.  */
-      for (i = 0; i < fetchlimit; i++)
-	if (extract_unsigned_integer (contents + i * width, width) == 0)
-	  break;
-
-      /* I is now either the number of non-null characters, or FETCHLIMIT.  */
+      /* If a length is specified, use that.  */
+      if ( *length >= 0 )
+	i = *length;
+      else
+	/* Otherwise, look for a null character.  */
+	for (i = 0; i < fetchlimit; i++)
+	  if (extract_unsigned_integer (contents + i * width, width) == 0)
+	    break;
+      
+      /* I is now either a user-defined length, the number of non-null
+	 characters, or FETCHLIMIT.  */      
       *length = i * width;
       *buffer = xmalloc (*length);
       memcpy (*buffer, contents, *length);
@@ -262,7 +267,10 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
     }
   else
     {
-      err = read_string (value_as_address (value), -1, width, fetchlimit,
+      /* Copy length to orig_len, as read_string will write the number
+	 of characters 'read' to length.  */
+      int orig_len = *length;
+      err = read_string (value_as_address (value), orig_len, width, fetchlimit,
 			 buffer, length);
       if (err)
 	{
diff --git a/gdb/python/python-value.c b/gdb/python/python-value.c
index b55463c..a308a7a 100644
--- a/gdb/python/python-value.c
+++ b/gdb/python/python-value.c
@@ -179,7 +179,7 @@ valpy_type (PyObject *self, PyObject *args)
 static PyObject *
 valpy_string (PyObject *self, PyObject *args, PyObject *kw)
 {
-  int length, ret = 0;
+  int length = -1, ret = 0;
   gdb_byte *buffer;
   struct value *value = ((value_object *) self)->value;
   volatile struct gdb_exception except;
@@ -188,10 +188,10 @@ valpy_string (PyObject *self, PyObject *args, PyObject *kw)
   const char *errors = NULL;
   const char *user_encoding = NULL;
   const char *la_encoding = NULL;
-  static char *keywords[] = { "encoding", "errors" };
+  static char *keywords[] = { "encoding", "errors", "length" };
 
-  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ss", keywords,
-				    &user_encoding, &errors))
+  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ssi", keywords,
+				    &user_encoding, &errors, &length))
     return NULL;
 
   TRY_CATCH (except, RETURN_MASK_ALL)
@@ -957,7 +957,7 @@ static PyMethodDef value_object_methods[] = {
   { "dereference", valpy_dereference, METH_NOARGS, "Dereferences the value." },
   { "type", valpy_type, METH_NOARGS, "Return type of the value." },
   { "string", (PyCFunction) valpy_string, METH_VARARGS | METH_KEYWORDS,
-    "string ([encoding] [, errors]) -> string\n\
+    "string ([encoding] [, errors] [, length]) -> string\n\
 Return Unicode string representation of the value." },
   {NULL}  /* Sentinel */
 };
diff --git a/gdb/testsuite/gdb.python/python-value.c b/gdb/testsuite/gdb.python/python-value.c
index 092c520..f3d6284 100644
--- a/gdb/testsuite/gdb.python/python-value.c
+++ b/gdb/testsuite/gdb.python/python-value.c
@@ -44,6 +44,8 @@ main (int argc, char *argv[])
   struct s s;
   union u u;
   PTR x = &s;
+  char st[17] = "divide et impera";
+  char nullst[17] = "divide\0et\0impera";
 
   s.a = 3;
   s.b = 5;
diff --git a/gdb/testsuite/gdb.python/python-value.exp b/gdb/testsuite/gdb.python/python-value.exp
index cbcf91f..9169fc7 100644
--- a/gdb/testsuite/gdb.python/python-value.exp
+++ b/gdb/testsuite/gdb.python/python-value.exp
@@ -234,6 +234,23 @@ proc test_value_in_inferior {} {
 
   # Test address attribute
   gdb_test "python print 'result =', arg0.address" "= 0x\[\[:xdigit:\]\]+" "Test address attribute"
+
+  # Test string fetches,  both partial and whole.
+  gdb_test "print st" "\"divide et impera\""
+  gdb_py_test_silent_cmd "python st = gdb.history (0)" "get value from history" 1
+  gdb_test "python print st.string ()"  "divide et impera"  "Test string with no length"
+  gdb_test "python print st.string (length = -1)" "divide et impera" "Test string (length = -1) is all of the string"
+  gdb_test "python print st.string (length = 6)" "divide"
+  gdb_test "python print st.string (length = 0)" "" "Test string (length = 0) is empty"
+
+  # Fetch a string that has embedded nulls.
+  gdb_test "print nullst" "\"divide\\\\000et\\\\000impera\".*"
+  gdb_py_test_silent_cmd "python nullst = gdb.history (0)" "get value from history" 1
+  gdb_test "python print nullst.string ()" "divide" "Test string to first null"
+  # Python cannot print strings that contain the null (\0) character.
+  # For the purposes of this test, use repr()
+  gdb_py_test_silent_cmd "python nullst = nullst.string (length = 9)" "get string beyond null" 1
+  gdb_test "python print repr(nullst)" "u'divide\\\\x00et'"
 }
 
 proc test_value_after_death {} {

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-06 16:54 [python][patch] Add options length parameter to value.string(...) Phil Muldoon
@ 2009-04-06 22:14 ` Tom Tromey
  2009-04-06 22:33   ` Thiago Jung Bauermann
  2009-04-09 16:12   ` Phil Muldoon
  0 siblings, 2 replies; 17+ messages in thread
From: Tom Tromey @ 2009-04-06 22:14 UTC (permalink / raw)
  To: Phil Muldoon; +Cc: Project Archer

>>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:

Phil> This patch adds an optional length parameter to the
Phil> value.string() method.

This needs a documentation change as well.

Also, please update language.h to document the new meaning of the
length argument to la_get_string.  The comment before c_get_string
also needs this.

Phil> +      if ( *length >= 0 )

Extra spaces after "(" and before ")" here.

Phil> +      /* Copy length to orig_len, as read_string will write the number
Phil> +	 of characters 'read' to length.  */
Phil> +      int orig_len = *length;

You don't need this.  Just pass *length to read_string.  The right
thing will happen.


I think we should not strip a trailing \0 in the case that a length
was requested.

Consider a library that uses counted strings but does not terminate
them with \0.  In this case, it seems to me that it would be
impossible to read such a string with a trailing \0.

In the other case, when the length is found by searching for a \0, it
is still appropriate to strip the trailing \0.

So, please implement this, and make sure this behavior is documented.


The rest looks good to me.

Tom

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-06 22:14 ` Tom Tromey
@ 2009-04-06 22:33   ` Thiago Jung Bauermann
  2009-04-09  9:58     ` Phil Muldoon
  2009-04-09 16:12   ` Phil Muldoon
  1 sibling, 1 reply; 17+ messages in thread
From: Thiago Jung Bauermann @ 2009-04-06 22:33 UTC (permalink / raw)
  To: Tom Tromey; +Cc: Phil Muldoon, Project Archer

El lun, 06-04-2009 a las 16:14 -0600, Tom Tromey escribió:
> >>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:
> The rest looks good to me.

From what we talked on IRC, in this block:

      /* If we know the size of the array, we can use it as a limit on the
         number of characters to be fetched.  */
      if (TYPE_NFIELDS (type) == 1
          && TYPE_CODE (TYPE_FIELD_TYPE (type, 0)) == TYPE_CODE_RANGE)

we want to set *length and not fetchlimit from the array size. But
probably only if *length is -1. If the caller specified a *length
already, we shouldn't override it and continue using the array size for
the fetchlimit.
-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-06 22:33   ` Thiago Jung Bauermann
@ 2009-04-09  9:58     ` Phil Muldoon
  2009-04-09 16:08       ` Thiago Jung Bauermann
  0 siblings, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-09  9:58 UTC (permalink / raw)
  To: Thiago Jung Bauermann; +Cc: Tom Tromey, Project Archer

Thiago Jung Bauermann wrote:
> El lun, 06-04-2009 a las 16:14 -0600, Tom Tromey escribió:
>   
>>>>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:
>>>>>>>               
>> The rest looks good to me.
>>     
>
> >From what we talked on IRC, in this block:
>
>       /* If we know the size of the array, we can use it as a limit on the
>          number of characters to be fetched.  */
>       if (TYPE_NFIELDS (type) == 1
>           && TYPE_CODE (TYPE_FIELD_TYPE (type, 0)) == TYPE_CODE_RANGE)
>
> we want to set *length and not fetchlimit from the array size. But
> probably only if *length is -1. If the caller specified a *length
> already, we shouldn't override it and continue using the array size for
> the fetchlimit.
>   
I'm not sure what benefit setting the length to arraysize in the case of 
length  -1 would achieve? Fetchlimit will be set to either unint_max or 
the actual bounds of the array, and -1 means return at first null or  
fetchlimit length within the fetchlimit?  So sending length of -1, and a 
fetchlimit as described in the current existing logic would "do the 
right thing"? If I were to set length to the array bound in the -1 case, 
it would fetch the whole array, and not stop on the first null (which we 
want for -1)?

Regards

Phil

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09  9:58     ` Phil Muldoon
@ 2009-04-09 16:08       ` Thiago Jung Bauermann
  2009-04-09 16:28         ` Phil Muldoon
  0 siblings, 1 reply; 17+ messages in thread
From: Thiago Jung Bauermann @ 2009-04-09 16:08 UTC (permalink / raw)
  To: Phil Muldoon; +Cc: Tom Tromey, Project Archer

El jue, 09-04-2009 a las 10:58 +0100, Phil Muldoon escribió:
> Thiago Jung Bauermann wrote:
> > >From what we talked on IRC, in this block:
> >
> >       /* If we know the size of the array, we can use it as a limit on the
> >          number of characters to be fetched.  */
> >       if (TYPE_NFIELDS (type) == 1
> >           && TYPE_CODE (TYPE_FIELD_TYPE (type, 0)) == TYPE_CODE_RANGE)
> >
> > we want to set *length and not fetchlimit from the array size. But
> > probably only if *length is -1. If the caller specified a *length
> > already, we shouldn't override it and continue using the array size for
> > the fetchlimit.
> >   
> I'm not sure what benefit setting the length to arraysize in the case of 
> length  -1 would achieve? Fetchlimit will be set to either unint_max or 
> the actual bounds of the array, and -1 means return at first null or  
> fetchlimit length within the fetchlimit?  So sending length of -1, and a 
> fetchlimit as described in the current existing logic would "do the 
> right thing"? If I were to set length to the array bound in the -1 case, 
> it would fetch the whole array, and not stop on the first null (which we 
> want for -1)?

If you call read_string with length = -1, it will stop at the first null
character, or when fetchlimit is reached. So the current code doesn't
fetch all of an array, it stops at the first null character. I believe
this is what we're trying to avoid?

So, in the case of an array with known size, if we want to fetch the
whole of it without paying attention to null characters inside it, we
should set length to its size.

I say we should do this only if length is -1 assuming that the caller
knows what it's doing when it says exactly how many characters it wants,
and we shouldn't try to be smart without being asked to. But perhaps I'm
wrong here, and we should always set length to the array size? (This is
only a theoretical worry though, I'm not thinking of a specific case
where the array length is known but the caller is asking for a specific
number of characters).

Did I clarify my thoughts, or just added more confusion? :-)
-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-06 22:14 ` Tom Tromey
  2009-04-06 22:33   ` Thiago Jung Bauermann
@ 2009-04-09 16:12   ` Phil Muldoon
  2009-04-09 20:39     ` Thiago Jung Bauermann
  1 sibling, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-09 16:12 UTC (permalink / raw)
  To: Tom Tromey; +Cc: Project Archer

[-- Attachment #1: Type: text/plain, Size: 342 bytes --]

Tom Tromey wrote:
>>>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:
>>>>>>             
>
> Phil> This patch adds an optional length parameter to the
> Phil> value.string() method.
>
> This needs a documentation change as well.
>
>
>   
(and other fixes)

I think I have implemented all of your requested changes.

Regards

Phil.



[-- Attachment #2: valpy_variable_string_fetch.patch --]
[-- Type: text/x-patch, Size: 10211 bytes --]

diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 8b5410f..d03a88f 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -183,16 +183,20 @@ c_printstr (struct ui_file *stream, const gdb_byte *string,
 }
 
 /* Obtain a C string from the inferior storing it in a newly allocated
-   buffer in BUFFER, which should be freed by the caller.  The string is
-   read until a null character is found. If VALUE is an array with known
-   length, the function will not read past the end of the array.  LENGTH
-   will contain the size of the string in bytes (not counting the null
-   character).
-
-   Assumes strings are terminated by a null character.  The size of a character
-   is determined by the length of the target type of the pointer or array.
-   This means that a null byte present in a multi-byte character will not
-   terminate the string unless the whole character is null.
+   buffer in BUFFER, which should be freed by the caller.  If length
+   is specified at -1, the string is read until a null character is
+   found, otherwise the string is read to the length specified.  
+   If VALUE is an array with a known length, the function will not
+   read past the end of the array.  LENGTH will contain the size of
+   the string in bytes.  (If a length of -1 is specified, the length
+   returned will not include the null character).
+
+   In the case of length specified as -1, assume strings are
+   terminated by a null character.  The size of a character is
+   determined by the length of the target type of the pointer or
+   array.  This means that a null byte present in a multi-byte
+   character will not terminate the string unless the whole character
+   is null.
 
    CHARSET is always set to the target charset.  */
 
@@ -204,6 +208,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
   unsigned int fetchlimit;
   struct type *type = check_typedef (value_type (value));
   struct type *element_type = TYPE_TARGET_TYPE (type);
+  int req_length = *length;
 
   if (element_type == NULL)
     goto error;
@@ -249,12 +254,17 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
       int i;
       const gdb_byte *contents = value_contents (value);
 
-      /* Look for a null character.  */
-      for (i = 0; i < fetchlimit; i++)
-	if (extract_unsigned_integer (contents + i * width, width) == 0)
-	  break;
-
-      /* I is now either the number of non-null characters, or FETCHLIMIT.  */
+      /* If a length is specified, use that.  */
+      if (*length >= 0)
+	i = *length;
+      else
+	/* Otherwise, look for a null character.  */
+	for (i = 0; i < fetchlimit; i++)
+	  if (extract_unsigned_integer (contents + i * width, width) == 0)
+	    break;
+      
+      /* I is now either a user-defined length, the number of non-null
+	 characters, or FETCHLIMIT.  */      
       *length = i * width;
       *buffer = xmalloc (*length);
       memcpy (*buffer, contents, *length);
@@ -262,7 +272,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
     }
   else
     {
-      err = read_string (value_as_address (value), -1, width, fetchlimit,
+      err = read_string (value_as_address (value), *length, width, fetchlimit,
 			 buffer, length);
       if (err)
 	{
@@ -272,10 +282,15 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
 	}
     }
 
-  /* If the last character is null, subtract it from LENGTH.  */
-  if (*length > 0
-      && extract_unsigned_integer (*buffer + *length - width, width) == 0)
-    *length -= width;
+  /* If the LENGTH is specified at -1, we want to return the string
+     length up to the terminating null character.  If an actual length
+     was specified, we want to return the length of exactly what was
+     read.  */
+  if (req_length == -1)
+    /* If the last character is null, subtract it from LENGTH.  */
+    if (*length > 0
+	&& extract_unsigned_integer (*buffer + *length - width, width) == 0)
+      *length -= width;
 
   *charset = target_charset ();
 
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 8509ecc..c7091a6 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -18442,7 +18442,7 @@ The result @code{bar} will be a @code{gdb.Value} object holding the
 value pointed to by @code{foo}.
 @end defmethod
 
-@defmethod Value string @r{[}encoding@r{]} @r{[}errors@r{]}
+@defmethod Value string @r{[}encoding@r{]} @r{[}errors@r{]} @r{[}length@r{]}
 If this @code{gdb.Value} represents a string, then this method
 converts the contents to a Python string.  Otherwise, this method will
 throw an exception.
@@ -18453,7 +18453,9 @@ language.
 
 For C-like languages, a value is a string if it is a pointer to or an
 array of characters or ints.  The string is assumed to be terminated
-by a zero of the appropriate width.
+by a zero of the appropriate width.  However if the optional length
+argument is given, the string will be converted beyond any embedded
+nulls up to the length specified.
 
 If the optional @var{encoding} argument is given, it must be a string
 naming the encoding of the string in the @code{gdb.Value}, such as
@@ -18467,6 +18469,9 @@ will be used, if the current language is able to supply one.
 
 The optional @var{errors} argument is the same as the corresponding
 argument to Python's @code{string.decode} method.
+
+If the optional @var{length} argumen is given, the string will be
+fetched and converted to the given length.
 @end defmethod
 @end table
 
diff --git a/gdb/language.h b/gdb/language.h
index 85826fd..97b93d6 100644
--- a/gdb/language.h
+++ b/gdb/language.h
@@ -283,10 +283,14 @@ struct language_defn
     int (*la_pass_by_reference) (struct type *type);
 
     /* Obtain a string from the inferior, storing it in a newly allocated
-       buffer in BUFFER, which should be freed by the caller.  LENGTH will
-       hold the size in bytes of the string (only actual characters, excluding
-       an eventual terminating null character).  CHARSET will hold the encoding
-       used in the string.  */
+       buffer in BUFFER, which should be freed by the caller.  If LENGTH
+       is specified at -1, the string is read until a null character is
+       found -  otherwise the string is read to the length specified.
+       On completion, LENGTH will hold the size in bytes of the string.
+       If a LENGTH of -1 was specified it will count only actual
+       characters, excluding any eventual terminating null character.
+       Otherwise LENGTH will equal all characters - including any nulls.
+       CHARSET will hold the encoding used in the string.  */
     void (*la_get_string) (struct value *value, gdb_byte **buffer, int *length,
 			  const char **charset);
 
diff --git a/gdb/python/python-value.c b/gdb/python/python-value.c
index 2eaf15f..3f0e114 100644
--- a/gdb/python/python-value.c
+++ b/gdb/python/python-value.c
@@ -196,7 +196,7 @@ valpy_get_type (PyObject *self, void *closure)
 static PyObject *
 valpy_string (PyObject *self, PyObject *args, PyObject *kw)
 {
-  int length, ret = 0;
+  int length = -1, ret = 0;
   gdb_byte *buffer;
   struct value *value = ((value_object *) self)->value;
   volatile struct gdb_exception except;
@@ -205,10 +205,10 @@ valpy_string (PyObject *self, PyObject *args, PyObject *kw)
   const char *errors = NULL;
   const char *user_encoding = NULL;
   const char *la_encoding = NULL;
-  static char *keywords[] = { "encoding", "errors" };
+  static char *keywords[] = { "encoding", "errors", "length" };
 
-  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ss", keywords,
-				    &user_encoding, &errors))
+  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ssi", keywords,
+				    &user_encoding, &errors, &length))
     return NULL;
 
   TRY_CATCH (except, RETURN_MASK_ALL)
@@ -975,7 +975,7 @@ static PyMethodDef value_object_methods[] = {
   { "cast", valpy_cast, METH_VARARGS, "Cast the value to the supplied type." },
   { "dereference", valpy_dereference, METH_NOARGS, "Dereferences the value." },
   { "string", (PyCFunction) valpy_string, METH_VARARGS | METH_KEYWORDS,
-    "string ([encoding] [, errors]) -> string\n\
+    "string ([encoding] [, errors] [, length]) -> string\n\
 Return Unicode string representation of the value." },
   {NULL}  /* Sentinel */
 };
diff --git a/gdb/testsuite/gdb.python/python-value.c b/gdb/testsuite/gdb.python/python-value.c
index 092c520..f3d6284 100644
--- a/gdb/testsuite/gdb.python/python-value.c
+++ b/gdb/testsuite/gdb.python/python-value.c
@@ -44,6 +44,8 @@ main (int argc, char *argv[])
   struct s s;
   union u u;
   PTR x = &s;
+  char st[17] = "divide et impera";
+  char nullst[17] = "divide\0et\0impera";
 
   s.a = 3;
   s.b = 5;
diff --git a/gdb/testsuite/gdb.python/python-value.exp b/gdb/testsuite/gdb.python/python-value.exp
index 3421406..e847c98 100644
--- a/gdb/testsuite/gdb.python/python-value.exp
+++ b/gdb/testsuite/gdb.python/python-value.exp
@@ -234,6 +234,23 @@ proc test_value_in_inferior {} {
 
   # Test address attribute
   gdb_test "python print 'result =', arg0.address" "= 0x\[\[:xdigit:\]\]+" "Test address attribute"
+
+  # Test string fetches,  both partial and whole.
+  gdb_test "print st" "\"divide et impera\""
+  gdb_py_test_silent_cmd "python st = gdb.history (0)" "get value from history" 1
+  gdb_test "python print st.string ()"  "divide et impera"  "Test string with no length"
+  gdb_test "python print st.string (length = -1)" "divide et impera" "Test string (length = -1) is all of the string"
+  gdb_test "python print st.string (length = 6)" "divide"
+  gdb_test "python print st.string (length = 0)" "" "Test string (length = 0) is empty"
+
+  # Fetch a string that has embedded nulls.
+  gdb_test "print nullst" "\"divide\\\\000et\\\\000impera\".*"
+  gdb_py_test_silent_cmd "python nullst = gdb.history (0)" "get value from history" 1
+  gdb_test "python print nullst.string ()" "divide" "Test string to first null"
+  # Python cannot print strings that contain the null (\0) character.
+  # For the purposes of this test, use repr()
+  gdb_py_test_silent_cmd "python nullst = nullst.string (length = 9)" "get string beyond null" 1
+  gdb_test "python print repr(nullst)" "u'divide\\\\x00et'"
 }
 
 proc test_value_after_death {} {

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09 16:08       ` Thiago Jung Bauermann
@ 2009-04-09 16:28         ` Phil Muldoon
  2009-04-09 18:46           ` Thiago Jung Bauermann
  0 siblings, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-09 16:28 UTC (permalink / raw)
  To: Thiago Jung Bauermann; +Cc: Tom Tromey, Project Archer

Thiago Jung Bauermann wrote:
> El jue, 09-04-2009 a las 10:58 +0100, Phil Muldoon escribió:
>   
>> Thiago Jung Bauermann wrote:
>>     
> If you call read_string with length = -1, it will stop at the first null
> character, or when fetchlimit is reached. So the current code doesn't
> fetch all of an array, it stops at the first null character. I believe
> this is what we're trying to avoid?
>   

Aha I see what you mean now. I wrote the patch to follow this logic:

1) If a length of -1 is specified, read until first null character. This 
is what read_string does, and seems sane in the C string sense. The 
default parameter passed to LA_GET_STRING in value.string() is -1, and 
so the current behaviour just fetches a string up to the first null  for 
c strings.

2) If a length is specified, ignore any embedded nulls - just fetch 
until length is satisfied, or fetchlimit is reached.

So:

char foo[50]  ="george";
char bar[50] = "george\0mildred";
char *foobar = "bungle";

With the patch (I just sent Tom's requested changes back to him) the 
behaviour is:

so just string() in the foo variable would return "george", length 6.
string() in the bar variable would return "george", length 6
string(length = 15) in the bar variable would override the null 
termination and return "george\0mildred"
string() in the foobar variable would return "bungle", but string(length 
= 500), would return "bungle" + trash.

With Tom's patch to the python string pretty printer (in C++) we can 
determine length.

> So, in the case of an array with known size, if we want to fetch the
> whole of it without paying attention to null characters inside it, we
> should set length to its size.
>   

I'm not adverse to this if this is what is the most sensible thing to 
do. I guess what is the sanest default?

Regards

Phil

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09 16:28         ` Phil Muldoon
@ 2009-04-09 18:46           ` Thiago Jung Bauermann
  2009-04-09 20:28             ` Phil Muldoon
  0 siblings, 1 reply; 17+ messages in thread
From: Thiago Jung Bauermann @ 2009-04-09 18:46 UTC (permalink / raw)
  To: Phil Muldoon; +Cc: Tom Tromey, Project Archer

El jue, 09-04-2009 a las 17:28 +0100, Phil Muldoon escribió:
> Thiago Jung Bauermann wrote:
> > If you call read_string with length = -1, it will stop at the first null
> > character, or when fetchlimit is reached. So the current code doesn't
> > fetch all of an array, it stops at the first null character. I believe
> > this is what we're trying to avoid?
> 
> Aha I see what you mean now. I wrote the patch to follow this logic:
> 
> 1) If a length of -1 is specified, read until first null character. This 
> is what read_string does, and seems sane in the C string sense. The 
> default parameter passed to LA_GET_STRING in value.string() is -1, and 
> so the current behaviour just fetches a string up to the first null  for 
> c strings.

Nice.

> 2) If a length is specified, ignore any embedded nulls - just fetch 
> until length is satisfied, or fetchlimit is reached.
> 
> So:
> 
> char foo[50]  ="george";
> char bar[50] = "george\0mildred";
> char *foobar = "bungle";
> 
> With the patch (I just sent Tom's requested changes back to him) the 
> behaviour is:
> 
> so just string() in the foo variable would return "george", length 6.
> string() in the bar variable would return "george", length 6
> string(length = 15) in the bar variable would override the null 
> termination and return "george\0mildred"
> string() in the foobar variable would return "bungle", but string(length 
> = 500), would return "bungle" + trash.

foobar is an interesting example. It is both a well-behaved
null-terminated string, and (AFAIK) an array with known size (7 bytes).

But your code will work correctly for it, no matter what default we
choose for the array-wit-known-size case, so it's not really important.

> With Tom's patch to the python string pretty printer (in C++) we can 
> determine length.
> 
> > So, in the case of an array with known size, if we want to fetch the
> > whole of it without paying attention to null characters inside it, we
> > should set length to its size.  
> 
> I'm not adverse to this if this is what is the most sensible thing to 
> do. I guess what is the sanest default?

I don't think it's clear what the default should be. In your examples
above, do we want string() to return "george", or "george\0mildred" + 35
bytes of garbage? Tough.
-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09 18:46           ` Thiago Jung Bauermann
@ 2009-04-09 20:28             ` Phil Muldoon
  2009-04-09 20:39               ` Thiago Jung Bauermann
  0 siblings, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-09 20:28 UTC (permalink / raw)
  To: Thiago Jung Bauermann; +Cc: Tom Tromey, Project Archer

Thiago Jung Bauermann wrote:
>>     
>>> So, in the case of an array with known size, if we want to fetch the
>>> whole of it without paying attention to null characters inside it, we
>>> should set length to its size.  
>>>       
>> I'm not adverse to this if this is what is the most sensible thing to 
>> do. I guess what is the sanest default?
>>     
>
> I don't think it's clear what the default should be. In your examples
> above, do we want string() to return "george", or "george\0mildred" + 35
> bytes of garbage? Tough.
>   


I elected in the default case just to revert to existing behaviour for C 
strings. Terminate them on the first null regardless of the physical 
bounds of the array. And to provide the length option to override null 
detection.

It is tough choice, and I wonder if we'll run into similar issues with 
strings with other languages down the line.

Regards

Phil


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09 16:12   ` Phil Muldoon
@ 2009-04-09 20:39     ` Thiago Jung Bauermann
  2009-04-09 21:29       ` Phil Muldoon
  0 siblings, 1 reply; 17+ messages in thread
From: Thiago Jung Bauermann @ 2009-04-09 20:39 UTC (permalink / raw)
  To: Phil Muldoon; +Cc: Tom Tromey, Project Archer

El jue, 09-04-2009 a las 17:12 +0100, Phil Muldoon escribió:
> I think I have implemented all of your requested changes.

I looked at the patch and it seems good. I have just a few nits, I hope
you will put up with me just a tad bit more. :-)

>  static PyObject *
>  valpy_string (PyObject *self, PyObject *args, PyObject *kw)
>  {

This function has the following comment above it:

/* Implementation of gdb.Value.string ([encoding] [, errors]) -> string
   Return Unicode string with value contents.  If ENCODING is not given,
   the string is assumed to be encoded in the target's charset.  */

Would you mind adding "[, length] to the prototype?

> +  gdb_test "python print st.string (length = 0)" "" "Test string (length = 0) is empty"

Does using "" as test pattern really test for an empty line? I know that
gdb_test puts an EOL marker right after the pattern, but it doesn't put
any required pattern before the pattern...
-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09 20:28             ` Phil Muldoon
@ 2009-04-09 20:39               ` Thiago Jung Bauermann
  0 siblings, 0 replies; 17+ messages in thread
From: Thiago Jung Bauermann @ 2009-04-09 20:39 UTC (permalink / raw)
  To: Phil Muldoon; +Cc: Tom Tromey, Project Archer

El jue, 09-04-2009 a las 21:27 +0100, Phil Muldoon escribió:
> I elected in the default case just to revert to existing behaviour for C 
> strings. Terminate them on the first null regardless of the physical 
> bounds of the array. And to provide the length option to override null 
> detection.

I'm fine with that. Thanks for looking into this issue.

> It is tough choice, and I wonder if we'll run into similar issues with 
> strings with other languages down the line.

Right. I hope we won't have to make API-incompatible changes to
accomodate them! But I think not...
-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09 20:39     ` Thiago Jung Bauermann
@ 2009-04-09 21:29       ` Phil Muldoon
  2009-04-13 22:08         ` Tom Tromey
  0 siblings, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-09 21:29 UTC (permalink / raw)
  To: Thiago Jung Bauermann; +Cc: Tom Tromey, Project Archer

[-- Attachment #1: Type: text/plain, Size: 1045 bytes --]

Thiago Jung Bauermann wrote:
> I looked at the patch and it seems good. I have just a few nits, I hope
> you will put up with me just a tad bit more. :-)
>
>   

Thanks for the swift review.

>>  static PyObject *
>>  valpy_string (PyObject *self, PyObject *args, PyObject *kw)
>>  {
>>     
>
> This function has the following comment above it:
>
> /* Implementation of gdb.Value.string ([encoding] [, errors]) -> string
>    Return Unicode string with value contents.  If ENCODING is not given,
>    the string is assumed to be encoded in the target's charset.  */
>
> Would you mind adding "[, length] to the prototype?
>   

Ack, sorry.  Added.

>> +  gdb_test "python print st.string (length = 0)" "" "Test string (length = 0) is empty"
>>     
>
> Does using "" as test pattern really test for an empty line? I know that
> gdb_test puts an EOL marker right after the pattern, but it doesn't put
> any required pattern before the pattern...
>   

Altered the test. Thanks for the tips on irc on doing this.

Patch attached.

Regards

Phil


[-- Attachment #2: valpy_variable_string_fetch.patch --]
[-- Type: text/x-patch, Size: 10862 bytes --]

diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 8b5410f..d03a88f 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -183,16 +183,20 @@ c_printstr (struct ui_file *stream, const gdb_byte *string,
 }
 
 /* Obtain a C string from the inferior storing it in a newly allocated
-   buffer in BUFFER, which should be freed by the caller.  The string is
-   read until a null character is found. If VALUE is an array with known
-   length, the function will not read past the end of the array.  LENGTH
-   will contain the size of the string in bytes (not counting the null
-   character).
-
-   Assumes strings are terminated by a null character.  The size of a character
-   is determined by the length of the target type of the pointer or array.
-   This means that a null byte present in a multi-byte character will not
-   terminate the string unless the whole character is null.
+   buffer in BUFFER, which should be freed by the caller.  If length
+   is specified at -1, the string is read until a null character is
+   found, otherwise the string is read to the length specified.  
+   If VALUE is an array with a known length, the function will not
+   read past the end of the array.  LENGTH will contain the size of
+   the string in bytes.  (If a length of -1 is specified, the length
+   returned will not include the null character).
+
+   In the case of length specified as -1, assume strings are
+   terminated by a null character.  The size of a character is
+   determined by the length of the target type of the pointer or
+   array.  This means that a null byte present in a multi-byte
+   character will not terminate the string unless the whole character
+   is null.
 
    CHARSET is always set to the target charset.  */
 
@@ -204,6 +208,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
   unsigned int fetchlimit;
   struct type *type = check_typedef (value_type (value));
   struct type *element_type = TYPE_TARGET_TYPE (type);
+  int req_length = *length;
 
   if (element_type == NULL)
     goto error;
@@ -249,12 +254,17 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
       int i;
       const gdb_byte *contents = value_contents (value);
 
-      /* Look for a null character.  */
-      for (i = 0; i < fetchlimit; i++)
-	if (extract_unsigned_integer (contents + i * width, width) == 0)
-	  break;
-
-      /* I is now either the number of non-null characters, or FETCHLIMIT.  */
+      /* If a length is specified, use that.  */
+      if (*length >= 0)
+	i = *length;
+      else
+	/* Otherwise, look for a null character.  */
+	for (i = 0; i < fetchlimit; i++)
+	  if (extract_unsigned_integer (contents + i * width, width) == 0)
+	    break;
+      
+      /* I is now either a user-defined length, the number of non-null
+	 characters, or FETCHLIMIT.  */      
       *length = i * width;
       *buffer = xmalloc (*length);
       memcpy (*buffer, contents, *length);
@@ -262,7 +272,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
     }
   else
     {
-      err = read_string (value_as_address (value), -1, width, fetchlimit,
+      err = read_string (value_as_address (value), *length, width, fetchlimit,
 			 buffer, length);
       if (err)
 	{
@@ -272,10 +282,15 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
 	}
     }
 
-  /* If the last character is null, subtract it from LENGTH.  */
-  if (*length > 0
-      && extract_unsigned_integer (*buffer + *length - width, width) == 0)
-    *length -= width;
+  /* If the LENGTH is specified at -1, we want to return the string
+     length up to the terminating null character.  If an actual length
+     was specified, we want to return the length of exactly what was
+     read.  */
+  if (req_length == -1)
+    /* If the last character is null, subtract it from LENGTH.  */
+    if (*length > 0
+	&& extract_unsigned_integer (*buffer + *length - width, width) == 0)
+      *length -= width;
 
   *charset = target_charset ();
 
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 8509ecc..c7091a6 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -18442,7 +18442,7 @@ The result @code{bar} will be a @code{gdb.Value} object holding the
 value pointed to by @code{foo}.
 @end defmethod
 
-@defmethod Value string @r{[}encoding@r{]} @r{[}errors@r{]}
+@defmethod Value string @r{[}encoding@r{]} @r{[}errors@r{]} @r{[}length@r{]}
 If this @code{gdb.Value} represents a string, then this method
 converts the contents to a Python string.  Otherwise, this method will
 throw an exception.
@@ -18453,7 +18453,9 @@ language.
 
 For C-like languages, a value is a string if it is a pointer to or an
 array of characters or ints.  The string is assumed to be terminated
-by a zero of the appropriate width.
+by a zero of the appropriate width.  However if the optional length
+argument is given, the string will be converted beyond any embedded
+nulls up to the length specified.
 
 If the optional @var{encoding} argument is given, it must be a string
 naming the encoding of the string in the @code{gdb.Value}, such as
@@ -18467,6 +18469,9 @@ will be used, if the current language is able to supply one.
 
 The optional @var{errors} argument is the same as the corresponding
 argument to Python's @code{string.decode} method.
+
+If the optional @var{length} argumen is given, the string will be
+fetched and converted to the given length.
 @end defmethod
 @end table
 
diff --git a/gdb/language.h b/gdb/language.h
index 85826fd..97b93d6 100644
--- a/gdb/language.h
+++ b/gdb/language.h
@@ -283,10 +283,14 @@ struct language_defn
     int (*la_pass_by_reference) (struct type *type);
 
     /* Obtain a string from the inferior, storing it in a newly allocated
-       buffer in BUFFER, which should be freed by the caller.  LENGTH will
-       hold the size in bytes of the string (only actual characters, excluding
-       an eventual terminating null character).  CHARSET will hold the encoding
-       used in the string.  */
+       buffer in BUFFER, which should be freed by the caller.  If LENGTH
+       is specified at -1, the string is read until a null character is
+       found -  otherwise the string is read to the length specified.
+       On completion, LENGTH will hold the size in bytes of the string.
+       If a LENGTH of -1 was specified it will count only actual
+       characters, excluding any eventual terminating null character.
+       Otherwise LENGTH will equal all characters - including any nulls.
+       CHARSET will hold the encoding used in the string.  */
     void (*la_get_string) (struct value *value, gdb_byte **buffer, int *length,
 			  const char **charset);
 
diff --git a/gdb/python/python-value.c b/gdb/python/python-value.c
index 2eaf15f..743e6a6 100644
--- a/gdb/python/python-value.c
+++ b/gdb/python/python-value.c
@@ -190,13 +190,16 @@ valpy_get_type (PyObject *self, void *closure)
   return obj->type;
 }
 
-/* Implementation of gdb.Value.string ([encoding] [, errors]) -> string
-   Return Unicode string with value contents.  If ENCODING is not given,
-   the string is assumed to be encoded in the target's charset.  */
+/* Implementation of gdb.Value.string ([encoding] [, errors] 
+   [, length]) -> string.  Return Unicode string with value contents.
+   If ENCODING is not given, the string is assumed to be encoded in
+   the target's charset.  If LENGTH is provided, only fetch string to
+   the length provided.  */
+
 static PyObject *
 valpy_string (PyObject *self, PyObject *args, PyObject *kw)
 {
-  int length, ret = 0;
+  int length = -1, ret = 0;
   gdb_byte *buffer;
   struct value *value = ((value_object *) self)->value;
   volatile struct gdb_exception except;
@@ -205,10 +208,10 @@ valpy_string (PyObject *self, PyObject *args, PyObject *kw)
   const char *errors = NULL;
   const char *user_encoding = NULL;
   const char *la_encoding = NULL;
-  static char *keywords[] = { "encoding", "errors" };
+  static char *keywords[] = { "encoding", "errors", "length" };
 
-  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ss", keywords,
-				    &user_encoding, &errors))
+  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ssi", keywords,
+				    &user_encoding, &errors, &length))
     return NULL;
 
   TRY_CATCH (except, RETURN_MASK_ALL)
@@ -975,7 +978,7 @@ static PyMethodDef value_object_methods[] = {
   { "cast", valpy_cast, METH_VARARGS, "Cast the value to the supplied type." },
   { "dereference", valpy_dereference, METH_NOARGS, "Dereferences the value." },
   { "string", (PyCFunction) valpy_string, METH_VARARGS | METH_KEYWORDS,
-    "string ([encoding] [, errors]) -> string\n\
+    "string ([encoding] [, errors] [, length]) -> string\n\
 Return Unicode string representation of the value." },
   {NULL}  /* Sentinel */
 };
diff --git a/gdb/testsuite/gdb.python/python-value.c b/gdb/testsuite/gdb.python/python-value.c
index 092c520..f3d6284 100644
--- a/gdb/testsuite/gdb.python/python-value.c
+++ b/gdb/testsuite/gdb.python/python-value.c
@@ -44,6 +44,8 @@ main (int argc, char *argv[])
   struct s s;
   union u u;
   PTR x = &s;
+  char st[17] = "divide et impera";
+  char nullst[17] = "divide\0et\0impera";
 
   s.a = 3;
   s.b = 5;
diff --git a/gdb/testsuite/gdb.python/python-value.exp b/gdb/testsuite/gdb.python/python-value.exp
index 3421406..59d13c6 100644
--- a/gdb/testsuite/gdb.python/python-value.exp
+++ b/gdb/testsuite/gdb.python/python-value.exp
@@ -234,6 +234,25 @@ proc test_value_in_inferior {} {
 
   # Test address attribute
   gdb_test "python print 'result =', arg0.address" "= 0x\[\[:xdigit:\]\]+" "Test address attribute"
+
+  # Test string fetches,  both partial and whole.
+  gdb_test "print st" "\"divide et impera\""
+  gdb_py_test_silent_cmd "python st = gdb.history (0)" "get value from history" 1
+  gdb_test "python print st.string ()"  "divide et impera"  "Test string with no length"
+  gdb_test "python print st.string (length = -1)" "divide et impera" "Test string (length = -1) is all of the string"
+  gdb_test "python print st.string (length = 6)" "divide"
+  gdb_test "python print \"---\"+st.string (length = 0)+\"---\"" "------" "Test string (length = 0) is empty"
+  gdb_test "python print len(st.string (length = 0))" "0" "Test length is 0"
+
+
+  # Fetch a string that has embedded nulls.
+  gdb_test "print nullst" "\"divide\\\\000et\\\\000impera\".*"
+  gdb_py_test_silent_cmd "python nullst = gdb.history (0)" "get value from history" 1
+  gdb_test "python print nullst.string ()" "divide" "Test string to first null"
+  # Python cannot print strings that contain the null (\0) character.
+  # For the purposes of this test, use repr()
+  gdb_py_test_silent_cmd "python nullst = nullst.string (length = 9)" "get string beyond null" 1
+  gdb_test "python print repr(nullst)" "u'divide\\\\x00et'"
 }
 
 proc test_value_after_death {} {

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-09 21:29       ` Phil Muldoon
@ 2009-04-13 22:08         ` Tom Tromey
  2009-04-15  9:13           ` Phil Muldoon
  2009-04-15 14:52           ` Phil Muldoon
  0 siblings, 2 replies; 17+ messages in thread
From: Tom Tromey @ 2009-04-13 22:08 UTC (permalink / raw)
  To: Phil Muldoon; +Cc: Thiago Jung Bauermann, Project Archer

>>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:

Phil> +   buffer in BUFFER, which should be freed by the caller.  If length
Phil> +   is specified at -1, the string is read until a null character is
Phil> +   found, otherwise the string is read to the length specified.  

This should mention that length is in units of characters.

And, it seems weird that on input length is characters and on output
it is bytes.  Please change this to be characters uniformly.

Phil> +   In the case of length specified as -1, assume strings are
Phil> +   terminated by a null character.  The size of a character is
Phil> +   determined by the length of the target type of the pointer or
Phil> +   array.  This means that a null byte present in a multi-byte
Phil> +   character will not terminate the string unless the whole character
Phil> +   is null.

Delete the last sentence and just add "of the appropriate width" to
the end of the first sentence.  I think that is a bit clearer.

Phil> +If the optional @var{length} argumen is given, the string will be

Typo, should be "argument".

Phil> +       Otherwise LENGTH will equal all characters - including any nulls.

I think change "equal" to "include" here.

Tom

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-13 22:08         ` Tom Tromey
@ 2009-04-15  9:13           ` Phil Muldoon
  2009-04-15  9:37             ` Phil Muldoon
  2009-04-15 14:52           ` Phil Muldoon
  1 sibling, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-15  9:13 UTC (permalink / raw)
  To: Tom Tromey; +Cc: Thiago Jung Bauermann, Project Archer

Tom Tromey wrote:
>>>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:
>>>>>>             
>
> Phil> +   buffer in BUFFER, which should be freed by the caller.  If length
> Phil> +   is specified at -1, the string is read until a null character is
> Phil> +   found, otherwise the string is read to the length specified.  
>
> This should mention that length is in units of characters.
>
> And, it seems weird that on input length is characters and on output
> it is bytes.  Please change this to be characters uniformly.
>   

Yeah I was naively replicating the behavior from read_string which 
expects a character length and returns bytes read; but you are right it 
is totally the wrong semantics to use in this case. Thanks for spotting 
this. In this case is multiplying the number of bytes read as returned 
by read_string, and multiplying it by the width the right thing to 
return here?


Regards

Phil

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-15  9:13           ` Phil Muldoon
@ 2009-04-15  9:37             ` Phil Muldoon
  2009-04-15 12:37               ` Tom Tromey
  0 siblings, 1 reply; 17+ messages in thread
From: Phil Muldoon @ 2009-04-15  9:37 UTC (permalink / raw)
  To: Tom Tromey; +Cc: Thiago Jung Bauermann, Project Archer

Phil Muldoon wrote:
> Tom Tromey wrote:
>>>>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:
>>
>> Phil> + buffer in BUFFER, which should be freed by the caller. If length
>> Phil> + is specified at -1, the string is read until a null character is
>> Phil> + found, otherwise the string is read to the length specified.
>> This should mention that length is in units of characters.
>>
>> And, it seems weird that on input length is characters and on output
>> it is bytes. Please change this to be characters uniformly.
>
> Yeah I was naively replicating the behavior from read_string which 
> expects a character length and returns bytes read; but you are right 
> it is totally the wrong semantics to use in this case. Thanks for 
> spotting this. In this case is multiplying the number of bytes read as 
> returned by read_string, and multiplying it by the width the right 
> thing to return here?

s/multiplying/dividing

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-15  9:37             ` Phil Muldoon
@ 2009-04-15 12:37               ` Tom Tromey
  0 siblings, 0 replies; 17+ messages in thread
From: Tom Tromey @ 2009-04-15 12:37 UTC (permalink / raw)
  To: Phil Muldoon; +Cc: Thiago Jung Bauermann, Project Archer

>>>>> "Phil" == Phil Muldoon <pmuldoon@redhat.com> writes:

>> In this case is multiplying the number of bytes read
>> as returned by read_string, and multiplying it by the width the
>> right thing to return here?

Phil> s/multiplying/dividing

Yes.

Tom

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [python][patch] Add options length parameter to value.string(...)
  2009-04-13 22:08         ` Tom Tromey
  2009-04-15  9:13           ` Phil Muldoon
@ 2009-04-15 14:52           ` Phil Muldoon
  1 sibling, 0 replies; 17+ messages in thread
From: Phil Muldoon @ 2009-04-15 14:52 UTC (permalink / raw)
  To: Tom Tromey; +Cc: Thiago Jung Bauermann, Project Archer

[-- Attachment #1: Type: text/plain, Size: 1535 bytes --]

Tom Tromey wrote:
> Phil> +   buffer in BUFFER, which should be freed by the caller.  If length
> Phil> +   is specified at -1, the string is read until a null character is
> Phil> +   found, otherwise the string is read to the length specified.  
>
> This should mention that length is in units of characters.
>
> And, it seems weird that on input length is characters and on output
> it is bytes.  Please change this to be characters uniformly.
>
> Phil> +   In the case of length specified as -1, assume strings are
> Phil> +   terminated by a null character.  The size of a character is
> Phil> +   determined by the length of the target type of the pointer or
> Phil> +   array.  This means that a null byte present in a multi-byte
> Phil> +   character will not terminate the string unless the whole character
> Phil> +   is null.
>
> Delete the last sentence and just add "of the appropriate width" to
> the end of the first sentence.  I think that is a bit clearer.
>
> Phil> +If the optional @var{length} argumen is given, the string will be
>
> Typo, should be "argument".
>
> Phil> +       Otherwise LENGTH will equal all characters - including any nulls.
>
> I think change "equal" to "include" here.
>   


I think I have encapsulated all of the requests to the comments. I 
decided to rewrite the comments a little more substantially than the 
requests to try and improve clarity. I also divided the number of bytes 
returned by read_string by the width of the character to return the 
number of characters read.

Phil



[-- Attachment #2: valpy_variable_string_fetch.patch --]
[-- Type: text/x-patch, Size: 11610 bytes --]

diff --git a/gdb/c-lang.c b/gdb/c-lang.c
index 198c088..ac1f98c 100644
--- a/gdb/c-lang.c
+++ b/gdb/c-lang.c
@@ -183,18 +183,17 @@ c_printstr (struct ui_file *stream, const gdb_byte *string,
 }
 
 /* Obtain a C string from the inferior storing it in a newly allocated
-   buffer in BUFFER, which should be freed by the caller.  The string is
-   read until a null character is found. If VALUE is an array with known
-   length, the function will not read past the end of the array.  LENGTH
-   will contain the size of the string in bytes (not counting the null
-   character).
-
-   Assumes strings are terminated by a null character.  The size of a character
-   is determined by the length of the target type of the pointer or array.
-   This means that a null byte present in a multi-byte character will not
-   terminate the string unless the whole character is null.
-
-   CHARSET is always set to the target charset.  */
+   buffer in BUFFER, which should be freed by the caller.  If LENGTH
+   is specified at -1, the string is read until a null character of
+   the appropriate width is found, otherwise the string is read to the
+   length of characters specified.  The size of a character is
+   determined by the length of the target type of the pointer or
+   array.  If VALUE is an array with a known length, the function will
+   not read past the end of the array.  
+   On completion, LENGTH will be set to the size of the string read in
+   characters.  (If a length of -1 is specified, the length returned
+   will not include the null character).  CHARSET is always set to the
+   target charset.  */
 
 void
 c_get_string (struct value *value, gdb_byte **buffer, int *length,
@@ -204,6 +203,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
   unsigned int fetchlimit;
   struct type *type = check_typedef (value_type (value));
   struct type *element_type = TYPE_TARGET_TYPE (type);
+  int req_length = *length;
 
   if (element_type == NULL)
     goto error;
@@ -239,7 +239,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
 
   width = TYPE_LENGTH (element_type);
 
-  /* If the string lives in GDB's memory intead of the inferior's, then we
+  /* If the string lives in GDB's memory instead of the inferior's, then we
      just need to copy it to BUFFER.  Also, since such strings are arrays
      with known size, FETCHLIMIT will hold the size of the array.  */
   if ((VALUE_LVAL (value) == not_lval
@@ -249,12 +249,17 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
       int i;
       const gdb_byte *contents = value_contents (value);
 
-      /* Look for a null character.  */
-      for (i = 0; i < fetchlimit; i++)
-	if (extract_unsigned_integer (contents + i * width, width) == 0)
-	  break;
-
-      /* I is now either the number of non-null characters, or FETCHLIMIT.  */
+      /* If a length is specified, use that.  */
+      if (*length >= 0)
+	i = *length;
+      else
+	/* Otherwise, look for a null character.  */
+	for (i = 0; i < fetchlimit; i++)
+	  if (extract_unsigned_integer (contents + i * width, width) == 0)
+	    break;
+      
+      /* I is now either a user-defined length, the number of non-null
+	 characters, or FETCHLIMIT.  */      
       *length = i * width;
       *buffer = xmalloc (*length);
       memcpy (*buffer, contents, *length);
@@ -262,7 +267,7 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
     }
   else
     {
-      err = read_string (value_as_address (value), -1, width, fetchlimit,
+      err = read_string (value_as_address (value), *length, width, fetchlimit,
 			 buffer, length);
       if (err)
 	{
@@ -272,10 +277,21 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length,
 	}
     }
 
-  /* If the last character is null, subtract it from LENGTH.  */
-  if (*length > 0
-      && extract_unsigned_integer (*buffer + *length - width, width) == 0)
-    *length -= width;
+  /* If the LENGTH is specified at -1, we want to return the string
+     length up to the terminating null character.  If an actual length
+     was specified, we want to return the length of exactly what was
+     read.  */
+  if (req_length == -1)
+    /* If the last character is null, subtract it from LENGTH.  */
+    if (*length > 0
+	&& extract_unsigned_integer (*buffer + *length - width, width) == 0)
+      *length -= width;
+
+  /* The read_string function will return the number of bytes read.
+     If length returned from read_string was > 0, return the number of
+     characters read by dividing the number of bytes by width.  */
+  if (*length != 0)
+    *length = *length / width;
 
   *charset = target_charset ();
 
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index da0fece..958a74f 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -18411,7 +18411,7 @@ The result @code{bar} will be a @code{gdb.Value} object holding the
 value pointed to by @code{foo}.
 @end defmethod
 
-@defmethod Value string @r{[}encoding@r{]} @r{[}errors@r{]}
+@defmethod Value string @r{[}encoding@r{]} @r{[}errors@r{]} @r{[}length@r{]}
 If this @code{gdb.Value} represents a string, then this method
 converts the contents to a Python string.  Otherwise, this method will
 throw an exception.
@@ -18422,7 +18422,9 @@ language.
 
 For C-like languages, a value is a string if it is a pointer to or an
 array of characters or ints.  The string is assumed to be terminated
-by a zero of the appropriate width.
+by a zero of the appropriate width.  However if the optional length
+argument is given, the string will be converted beyond any embedded
+nulls up to the length specified.
 
 If the optional @var{encoding} argument is given, it must be a string
 naming the encoding of the string in the @code{gdb.Value}, such as
@@ -18436,6 +18438,9 @@ will be used, if the current language is able to supply one.
 
 The optional @var{errors} argument is the same as the corresponding
 argument to Python's @code{string.decode} method.
+
+If the optional @var{length} argument is given, the string will be
+fetched and converted to the given length.
 @end defmethod
 @end table
 
diff --git a/gdb/language.h b/gdb/language.h
index 85826fd..d2d6bdb 100644
--- a/gdb/language.h
+++ b/gdb/language.h
@@ -283,10 +283,15 @@ struct language_defn
     int (*la_pass_by_reference) (struct type *type);
 
     /* Obtain a string from the inferior, storing it in a newly allocated
-       buffer in BUFFER, which should be freed by the caller.  LENGTH will
-       hold the size in bytes of the string (only actual characters, excluding
-       an eventual terminating null character).  CHARSET will hold the encoding
-       used in the string.  */
+       buffer in BUFFER, which should be freed by the caller.  If LENGTH
+       is specified at -1, the string is read until a null character
+       of the appropriate width is found -  otherwise the string is
+       read to the length of characters specified.
+       On completion, LENGTH will hold the size of the string in characters.
+       If a LENGTH of -1 was specified it will count only actual
+       characters, excluding any eventual terminating null character.
+       Otherwise LENGTH will include all characters - including any nulls.
+       CHARSET will hold the encoding used in the string.  */
     void (*la_get_string) (struct value *value, gdb_byte **buffer, int *length,
 			  const char **charset);
 
diff --git a/gdb/python/python-value.c b/gdb/python/python-value.c
index 2eaf15f..743e6a6 100644
--- a/gdb/python/python-value.c
+++ b/gdb/python/python-value.c
@@ -190,13 +190,16 @@ valpy_get_type (PyObject *self, void *closure)
   return obj->type;
 }
 
-/* Implementation of gdb.Value.string ([encoding] [, errors]) -> string
-   Return Unicode string with value contents.  If ENCODING is not given,
-   the string is assumed to be encoded in the target's charset.  */
+/* Implementation of gdb.Value.string ([encoding] [, errors] 
+   [, length]) -> string.  Return Unicode string with value contents.
+   If ENCODING is not given, the string is assumed to be encoded in
+   the target's charset.  If LENGTH is provided, only fetch string to
+   the length provided.  */
+
 static PyObject *
 valpy_string (PyObject *self, PyObject *args, PyObject *kw)
 {
-  int length, ret = 0;
+  int length = -1, ret = 0;
   gdb_byte *buffer;
   struct value *value = ((value_object *) self)->value;
   volatile struct gdb_exception except;
@@ -205,10 +208,10 @@ valpy_string (PyObject *self, PyObject *args, PyObject *kw)
   const char *errors = NULL;
   const char *user_encoding = NULL;
   const char *la_encoding = NULL;
-  static char *keywords[] = { "encoding", "errors" };
+  static char *keywords[] = { "encoding", "errors", "length" };
 
-  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ss", keywords,
-				    &user_encoding, &errors))
+  if (!PyArg_ParseTupleAndKeywords (args, kw, "|ssi", keywords,
+				    &user_encoding, &errors, &length))
     return NULL;
 
   TRY_CATCH (except, RETURN_MASK_ALL)
@@ -975,7 +978,7 @@ static PyMethodDef value_object_methods[] = {
   { "cast", valpy_cast, METH_VARARGS, "Cast the value to the supplied type." },
   { "dereference", valpy_dereference, METH_NOARGS, "Dereferences the value." },
   { "string", (PyCFunction) valpy_string, METH_VARARGS | METH_KEYWORDS,
-    "string ([encoding] [, errors]) -> string\n\
+    "string ([encoding] [, errors] [, length]) -> string\n\
 Return Unicode string representation of the value." },
   {NULL}  /* Sentinel */
 };
diff --git a/gdb/testsuite/gdb.python/python-value.c b/gdb/testsuite/gdb.python/python-value.c
index 092c520..f3d6284 100644
--- a/gdb/testsuite/gdb.python/python-value.c
+++ b/gdb/testsuite/gdb.python/python-value.c
@@ -44,6 +44,8 @@ main (int argc, char *argv[])
   struct s s;
   union u u;
   PTR x = &s;
+  char st[17] = "divide et impera";
+  char nullst[17] = "divide\0et\0impera";
 
   s.a = 3;
   s.b = 5;
diff --git a/gdb/testsuite/gdb.python/python-value.exp b/gdb/testsuite/gdb.python/python-value.exp
index 3421406..59d13c6 100644
--- a/gdb/testsuite/gdb.python/python-value.exp
+++ b/gdb/testsuite/gdb.python/python-value.exp
@@ -234,6 +234,25 @@ proc test_value_in_inferior {} {
 
   # Test address attribute
   gdb_test "python print 'result =', arg0.address" "= 0x\[\[:xdigit:\]\]+" "Test address attribute"
+
+  # Test string fetches,  both partial and whole.
+  gdb_test "print st" "\"divide et impera\""
+  gdb_py_test_silent_cmd "python st = gdb.history (0)" "get value from history" 1
+  gdb_test "python print st.string ()"  "divide et impera"  "Test string with no length"
+  gdb_test "python print st.string (length = -1)" "divide et impera" "Test string (length = -1) is all of the string"
+  gdb_test "python print st.string (length = 6)" "divide"
+  gdb_test "python print \"---\"+st.string (length = 0)+\"---\"" "------" "Test string (length = 0) is empty"
+  gdb_test "python print len(st.string (length = 0))" "0" "Test length is 0"
+
+
+  # Fetch a string that has embedded nulls.
+  gdb_test "print nullst" "\"divide\\\\000et\\\\000impera\".*"
+  gdb_py_test_silent_cmd "python nullst = gdb.history (0)" "get value from history" 1
+  gdb_test "python print nullst.string ()" "divide" "Test string to first null"
+  # Python cannot print strings that contain the null (\0) character.
+  # For the purposes of this test, use repr()
+  gdb_py_test_silent_cmd "python nullst = nullst.string (length = 9)" "get string beyond null" 1
+  gdb_test "python print repr(nullst)" "u'divide\\\\x00et'"
 }
 
 proc test_value_after_death {} {

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2009-04-15 14:52 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-04-06 16:54 [python][patch] Add options length parameter to value.string(...) Phil Muldoon
2009-04-06 22:14 ` Tom Tromey
2009-04-06 22:33   ` Thiago Jung Bauermann
2009-04-09  9:58     ` Phil Muldoon
2009-04-09 16:08       ` Thiago Jung Bauermann
2009-04-09 16:28         ` Phil Muldoon
2009-04-09 18:46           ` Thiago Jung Bauermann
2009-04-09 20:28             ` Phil Muldoon
2009-04-09 20:39               ` Thiago Jung Bauermann
2009-04-09 16:12   ` Phil Muldoon
2009-04-09 20:39     ` Thiago Jung Bauermann
2009-04-09 21:29       ` Phil Muldoon
2009-04-13 22:08         ` Tom Tromey
2009-04-15  9:13           ` Phil Muldoon
2009-04-15  9:37             ` Phil Muldoon
2009-04-15 12:37               ` Tom Tromey
2009-04-15 14:52           ` Phil Muldoon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).