From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2126) id 753173858C53; Tue, 20 Feb 2024 20:57:28 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 753173858C53 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1708462648; bh=EDZJXNtcBWeSpfmBGWcOhSnr9pOWMx57brPpchDmQIA=; h=From:To:Subject:Date:From; b=fwD/QOC+88eozKTBoxX4oINGI/ZPTt1QdlnCreM0aUbTkXEqp3aYDi/SQvJrJi0OV XVaOPlL0Mtoc2fZWeQkoeH8ILsBN2MjFRxrTS02hwA1enzsDFQISwsGLgb/tmKg2Te BUmqV9UsM2aSJnRVNJj1ZnIigqOBWaB0VwD4IN8k= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Tom Tromey To: gdb-cvs@sourceware.org Subject: [binutils-gdb] Rewrite Rust slice type handling X-Act-Checkin: binutils-gdb X-Git-Author: Tom Tromey X-Git-Refname: refs/heads/master X-Git-Oldrev: 94a75b0363b1e09416e9bd24cac72d98864688d8 X-Git-Newrev: b0dd661fa16a424f059b1e1d80e779508b1a9a12 Message-Id: <20240220205728.753173858C53@sourceware.org> Date: Tue, 20 Feb 2024 20:57:28 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3Db0dd661fa16a= 424f059b1e1d80e779508b1a9a12 commit b0dd661fa16a424f059b1e1d80e779508b1a9a12 Author: Tom Tromey Date: Tue Jan 30 10:06:46 2024 -0700 Rewrite Rust slice type handling =20 This patch rewrites the handling of slice types in Rust. =20 More recent versions of the Rust compiler changed how unsized types were emitted, letting gdb inspect them more nicely. However, gdb did not do this, and in fact treated all such types as if they were slices of arrays, which is incorrect. =20 This patch rewrites this handling and removes the restriction that unsized types must be array slices. I've added a comment explaining how unsized types are represented to rust-lang.c as well. =20 I looked into a different approach, namely changing the DWARF reader to fix up slice types to have a dynamic type. However, the approach taken here turned out to be simpler. =20 Tested on x86-64 Fedora 38 with a variety of Rust compiler versions. =20 Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=3D30330 Diff: --- gdb/rust-lang.c | 239 ++++++++++++++++++++++++++------= ---- gdb/rust-lang.h | 13 +- gdb/testsuite/gdb.rust/simple.exp | 2 +- gdb/testsuite/gdb.rust/unsized.exp | 2 +- gdb/testsuite/gdb.rust/unsized2.exp | 59 +++++++++ gdb/testsuite/gdb.rust/unsized2.rs | 67 ++++++++++ 6 files changed, 310 insertions(+), 72 deletions(-) diff --git a/gdb/rust-lang.c b/gdb/rust-lang.c index e0b5a887a23..ab537cc9752 100644 --- a/gdb/rust-lang.c +++ b/gdb/rust-lang.c @@ -153,9 +153,9 @@ rust_tuple_struct_type_p (struct type *type) return type->num_fields () > 0 && rust_underscore_fields (type); } =20 -/* See rust-lang.h. */ +/* Return true if TYPE is "slice-like"; false otherwise. */ =20 -bool +static bool rust_slice_type_p (const struct type *type) { if (type->code () =3D=3D TYPE_CODE_STRUCT @@ -269,6 +269,125 @@ rust_get_trait_object_pointer (struct value *value) return value_cast (pointer_type, value_field (value, 1 - vtable_field)); } =20 +/* Find and possibly rewrite the unsized part of a slice-like type. + + This function has two modes. If the out parameters are both NULL, + it will return true if an unsized member of IN_TYPE is found. + + If the out parameters are both non-NULL, it will do the same, but + will also rewrite the unsized member's type to be an array of the + appropriate type. BOUND is the upper bound of the new array. + + See convert_slice to understand the different kinds of unsized type + and how they are represented. +*/ +static bool +rewrite_slice_type (struct type *in_type, struct type **new_type, + LONGEST bound, ULONGEST *additional_length) +{ + if (in_type->code () !=3D TYPE_CODE_STRUCT) + return false; + + unsigned nfields =3D in_type->num_fields (); + if (nfields =3D=3D 0) + return false; + + struct type *rewritten; + const field &field =3D in_type->field (nfields - 1); + struct type *field_type =3D field.type (); + if (field.loc_kind () =3D=3D FIELD_LOC_KIND_BITPOS + && field.loc_bitpos () =3D=3D 8 * in_type->length ()) + { + if (additional_length =3D=3D nullptr) + return true; + rewritten =3D lookup_array_range_type (field_type, 0, bound); + *additional_length =3D rewritten->length (); + } + else + { + if (!rewrite_slice_type (field_type, &rewritten, bound, + additional_length)) + return false; + if (additional_length =3D=3D nullptr) + return true; + } + + struct type *result =3D copy_type (in_type); + result->copy_fields (in_type); + result->field (nfields - 1).set_type (rewritten); + result->set_length (result->length () + *additional_length); + + *new_type =3D result; + return true; +} + +/* Convert a Rust slice to its "true" representation. + + The Rust compiler emits slices as "fat" pointers like: + + struct { payload *data_ptr; usize length } + + Any sort of unsized type is emitted this way. + + If 'payload' is a struct type, then it must be searched to see if + the trailing field is unsized. This has to be done recursively (as + in, if the final field in the struct type itself has struct type, + then that type must be searched). In this scenario, the unsized + field can be recognized because it does not contribute to the + type's size. + + If 'payload' does not have a trailing unsized type, or if it is not + of struct type, then this slice is "array-like". In this case + rewriting will return an array. +*/ +static struct value * +convert_slice (struct value *val) +{ + struct type *type =3D check_typedef (val->type ()); + /* This must have been checked by the caller. */ + gdb_assert (rust_slice_type_p (type)); + + struct value *len =3D value_struct_elt (&val, {}, "length", nullptr, + "slice"); + LONGEST llen =3D value_as_long (len); + + struct value *ptr =3D value_struct_elt (&val, {}, "data_ptr", nullptr, + "slice"); + struct type *original_type =3D ptr->type ()->target_type (); + ULONGEST new_length_storage =3D 0; + struct type *new_type =3D nullptr; + if (!rewrite_slice_type (original_type, &new_type, llen - 1, + &new_length_storage)) + new_type =3D lookup_array_range_type (original_type, 0, llen - 1); + + struct value *result =3D value::allocate_lazy (new_type); + result->set_lval (lval_memory); + result->set_address (value_as_address (ptr)); + result->fetch_lazy (); + + return result; +} + +/* If TYPE is an array-like slice, return the element type; otherwise + return NULL. */ +static struct type * +rust_array_like_element_type (struct type *type) +{ + /* Caller must check this. */ + gdb_assert (rust_slice_type_p (type)); + for (int i =3D 0; i < type->num_fields (); ++i) + { + if (strcmp (type->field (i).name (), "data_ptr") =3D=3D 0) + { + struct type *base_type =3D type->field (i).type ()->target_type (); + if (rewrite_slice_type (base_type, nullptr, 0, nullptr)) + return nullptr; + return base_type; + } + } + return nullptr; +} + =0C =20 /* See language.h. */ @@ -324,57 +443,40 @@ static const struct generic_val_print_decorations rus= t_decorations =3D struct value * rust_slice_to_array (struct value *val) { - struct type *type =3D check_typedef (val->type ()); - /* This must have been checked by the caller. */ - gdb_assert (rust_slice_type_p (type)); - - struct value *base =3D value_struct_elt (&val, {}, "data_ptr", NULL, - "slice"); - struct value *len =3D value_struct_elt (&val, {}, "length", NULL, "slice= "); - LONGEST llen =3D value_as_long (len); - - struct type *elt_type =3D base->type ()->target_type (); - struct type *array_type =3D lookup_array_range_type (elt_type, 0, - llen - 1); - struct value *array =3D value::allocate_lazy (array_type); - array->set_lval (lval_memory); - array->set_address (value_as_address (base)); - - return array; + val =3D convert_slice (val); + if (val->type ()->code () !=3D TYPE_CODE_ARRAY) + return nullptr; + return val; } =20 /* Helper function to print a slice. */ =20 -static void -rust_val_print_slice (struct value *val, struct ui_file *stream, int recur= se, - const struct value_print_options *options) +void +rust_language::val_print_slice + (struct value *val, struct ui_file *stream, int recurse, + const struct value_print_options *options) const { - struct value *base =3D value_struct_elt (&val, {}, "data_ptr", NULL, - "slice"); - struct value *len =3D value_struct_elt (&val, {}, "length", NULL, "slice= "); + struct type *orig_type =3D check_typedef (val->type ()); =20 + val =3D convert_slice (val); struct type *type =3D check_typedef (val->type ()); - if (strcmp (type->name (), "&str") =3D=3D 0) - val_print_string (base->type ()->target_type (), "UTF-8", - value_as_address (base), value_as_long (len), stream, - options); - else - { - LONGEST llen =3D value_as_long (len); =20 - type_print (val->type (), "", stream, -1); - gdb_printf (stream, " "); - - if (llen =3D=3D 0) - gdb_printf (stream, "[]"); - else + /* &str is handled here; but for all other slice types it is fine to + simply print the contents. */ + if (orig_type->name () !=3D nullptr + && strcmp (orig_type->name (), "&str") =3D=3D 0) + { + LONGEST low_bound, high_bound; + if (get_array_bounds (type, &low_bound, &high_bound)) { - struct value *array =3D rust_slice_to_array (val); - array->fetch_lazy (); - generic_value_print (array, stream, recurse, options, - &rust_decorations); + val_print_string (type->target_type (), "UTF-8", + val->address (), high_bound - low_bound + 1, + stream, options); + return; } } + + value_print_inner (val, stream, recurse, options); } =20 /* See rust-lang.h. */ @@ -390,7 +492,7 @@ rust_language::val_print_struct =20 if (rust_slice_type_p (type)) { - rust_val_print_slice (val, stream, recurse, options); + val_print_slice (val, stream, recurse, options); return; } =20 @@ -1180,6 +1282,7 @@ rust_subscript (struct type *expect_type, struct expr= ession *exp, low =3D value_as_long (rhs); =20 struct type *type =3D check_typedef (lhs->type ()); + struct type *orig_type =3D type; if (noside =3D=3D EVAL_AVOID_SIDE_EFFECTS) { struct type *base_type =3D nullptr; @@ -1187,16 +1290,9 @@ rust_subscript (struct type *expect_type, struct exp= ression *exp, base_type =3D type->target_type (); else if (rust_slice_type_p (type)) { - for (int i =3D 0; i < type->num_fields (); ++i) - { - if (strcmp (type->field (i).name (), "data_ptr") =3D=3D 0) - { - base_type =3D type->field (i).type ()->target_type (); - break; - } - } + base_type =3D rust_array_like_element_type (type); if (base_type =3D=3D nullptr) - error (_("Could not find 'data_ptr' in slice type")); + error (_("Cannot subscript non-array-like slice")); } else if (type->code () =3D=3D TYPE_CODE_PTR) base_type =3D type->target_type (); @@ -1227,6 +1323,12 @@ rust_subscript (struct type *expect_type, struct exp= ression *exp, LONGEST low_bound; struct value *base; =20 + if (rust_slice_type_p (type)) + { + lhs =3D convert_slice (lhs); + type =3D check_typedef (lhs->type ()); + } + if (type->code () =3D=3D TYPE_CODE_ARRAY) { base =3D lhs; @@ -1236,15 +1338,6 @@ rust_subscript (struct type *expect_type, struct exp= ression *exp, error (_("Found array with non-zero lower bound")); ++high_bound; } - else if (rust_slice_type_p (type)) - { - struct value *len; - - base =3D value_struct_elt (&lhs, {}, "data_ptr", NULL, "slice"); - len =3D value_struct_elt (&lhs, {}, "length", NULL, "slice"); - low_bound =3D 0; - high_bound =3D value_as_long (len); - } else if (type->code () =3D=3D TYPE_CODE_PTR) { base =3D lhs; @@ -1284,9 +1377,11 @@ rust_subscript (struct type *expect_type, struct exp= ression *exp, usize =3D language_lookup_primitive_type (exp->language_defn, exp->gdbarch, "usize"); - const char *new_name =3D ((type !=3D nullptr - && rust_slice_type_p (type)) - ? type->name () : "&[*gdb*]"); + /* Preserve the name for slice-of-slice; this lets + string-printing work a bit more nicely. */ + const char *new_name =3D ((orig_type !=3D nullptr + && rust_slice_type_p (orig_type)) + ? orig_type->name () : "&[*gdb*]"); =20 slice =3D rust_slice_type (new_name, result->type (), usize); =20 @@ -1477,7 +1572,11 @@ rust_structop::evaluate (struct type *expect_type, } } else - result =3D value_struct_elt (&lhs, {}, field_name, NULL, "structure"); + { + if (rust_slice_type_p (type)) + lhs =3D convert_slice (lhs); + result =3D value_struct_elt (&lhs, {}, field_name, NULL, "structure"= ); + } if (noside =3D=3D EVAL_AVOID_SIDE_EFFECTS) result =3D value::zero (result->type (), result->lval ()); return result; @@ -1677,6 +1776,16 @@ rust_language::emitchar (int ch, struct type *chtype, =20 /* See language.h. */ =20 +bool +rust_language::is_array_like (struct type *type) const +{ + if (!rust_slice_type_p (type)) + return false; + return rust_array_like_element_type (type) !=3D nullptr; +} + +/* See language.h. */ + bool rust_language::is_string_type_p (struct type *type) const { diff --git a/gdb/rust-lang.h b/gdb/rust-lang.h index e76a63ee037..9ae5961e9ac 100644 --- a/gdb/rust-lang.h +++ b/gdb/rust-lang.h @@ -34,9 +34,6 @@ extern bool rust_tuple_type_p (struct type *type); /* Return true if TYPE is a tuple struct type; otherwise false. */ extern bool rust_tuple_struct_type_p (struct type *type); =20 -/* Return true if TYPE is a slice type, otherwise false. */ -extern bool rust_slice_type_p (const struct type *type); - /* Given a block, find the name of the block's crate. Returns an empty stringif no crate name can be found. */ extern std::string rust_crate_for_block (const struct block *block); @@ -196,8 +193,7 @@ public: =20 /* See language.h. */ =20 - bool is_array_like (struct type *type) const override - { return rust_slice_type_p (type); } + bool is_array_like (struct type *type) const override; =20 /* See language.h. */ =20 @@ -211,6 +207,13 @@ public: =20 private: =20 + /* Helper for value_print_inner, arguments are as for that function. + Prints a slice. */ + + void val_print_slice (struct value *val, struct ui_file *stream, + int recurse, + const struct value_print_options *options) const; + /* Helper for value_print_inner, arguments are as for that function. Prints structs and untagged unions. */ =20 diff --git a/gdb/testsuite/gdb.rust/simple.exp b/gdb/testsuite/gdb.rust/sim= ple.exp index 1e6fc94400e..7f5fbad7a3f 100644 --- a/gdb/testsuite/gdb.rust/simple.exp +++ b/gdb/testsuite/gdb.rust/simple.exp @@ -317,7 +317,7 @@ proc test_one_slice {svar length base range} { global hex =20 # Just accept any array here. - set result " =3D &\\\[.*\\\] \\\[.*\\\]" + set result " =3D \\\[.*\\\]" =20 gdb_test "print $svar" $result gdb_test "print &${base}\[${range}\]" $result diff --git a/gdb/testsuite/gdb.rust/unsized.exp b/gdb/testsuite/gdb.rust/un= sized.exp index 94750896288..fab655790e6 100644 --- a/gdb/testsuite/gdb.rust/unsized.exp +++ b/gdb/testsuite/gdb.rust/unsized.exp @@ -33,6 +33,6 @@ if {![runto ${srcfile}:$line]} { gdb_test "ptype us" " =3D .*V<\\\[u8\\\]>.*" =20 if {[rust_at_least 1.61]} { - gdb_test "print us2" " =3D .*Box<.*> \\\[1, 2, 3\\\]" + gdb_test "print us2" " =3D \\\[1, 2, 3\\\]" gdb_test "ptype us2" "type =3D .*" } diff --git a/gdb/testsuite/gdb.rust/unsized2.exp b/gdb/testsuite/gdb.rust/u= nsized2.exp new file mode 100644 index 00000000000..5b7be45d8ce --- /dev/null +++ b/gdb/testsuite/gdb.rust/unsized2.exp @@ -0,0 +1,59 @@ +# Copyright (C) 2024 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Test the handling of unsized types. + +load_lib rust-support.exp +require allow_rust_tests +require {can_compile rust} +require {rust_at_least 1.61} + +standard_testfile .rs +if {[prepare_for_testing "failed to prepare" $testfile $srcfile {debug rus= t}]} { + return -1 +} + +set line [gdb_get_line_number "set breakpoint here"] +if {![runto ${srcfile}:$line]} { + untested "could not run to breakpoint" + return -1 +} + +set base_value \ + [string cat \ + "MaybeUnsizedStruct<.*?>" \ + [string_to_regexp " {regular: 23, rest: \[5, 6, 7\]}"]] + +gdb_test "print *sized_struct" \ + " =3D .*$base_value" +gdb_test "print *nested_sized_struct" \ + " =3D .*MaybeUnsizedStruct<.*?> {regular: 91, rest: .*$base_value}" + +gdb_test "print unsized_struct" \ + " =3D .*$base_value" +gdb_test "print *reference" \ + " =3D .*$base_value" + +gdb_test "print nested_unsized_struct" \ + " =3D .*MaybeUnsizedStruct<.*?> {regular: 91, rest: .*$base_value}" + +gdb_test "print alpha" \ + " =3D .*MaybeUnsizedStruct2<.*?> {value: \\\[97, 98, 99, 0\\\]}" +gdb_test "print beta" \ + " =3D .*MaybeUnsizedStruct2<.*?> {value: \\\[97, 98, 99, 0\\\]}" + +gdb_test "print sized_struct.regular" " =3D 23" +gdb_test "print nested_unsized_struct.regular" " =3D 91" +gdb_test "print unsized_struct.rest\[1\]" " =3D 6" diff --git a/gdb/testsuite/gdb.rust/unsized2.rs b/gdb/testsuite/gdb.rust/un= sized2.rs new file mode 100644 index 00000000000..980a5fe92f3 --- /dev/null +++ b/gdb/testsuite/gdb.rust/unsized2.rs @@ -0,0 +1,67 @@ +// Copyright (C) 2024 Free Software Foundation, Inc. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_assignments)] + +fn ignore(x: T) { } + +// A generic struct that is unsized if T is unsized. +pub struct MaybeUnsizedStruct { + pub regular: u32, + pub rest: T, +} + +// Same but without an ordinary part. +pub struct MaybeUnsizedStruct2 { + value: T, +} + +fn main() { + // This struct is still sized because T is a fixed-length array + let sized_struct =3D &MaybeUnsizedStruct { + regular: 23, + rest: [5, 6, 7], + }; + + // This struct is still sized because T is sized + let nested_sized_struct =3D &MaybeUnsizedStruct { + regular: 91, + rest: MaybeUnsizedStruct { + regular: 23, + rest: [5, 6, 7], + }, + }; + + // This will be a fat pointer, containing the length of the final + // field. + let unsized_struct: &MaybeUnsizedStruct<[u32]> =3D sized_struct; + + // This will also be a fat pointer, containing the length of the + // final field. + let nested_unsized_struct: + &MaybeUnsizedStruct> =3D nested_sized_st= ruct; + + let alpha: MaybeUnsizedStruct2<[u8; 4]> =3D MaybeUnsizedStruct2 { valu= e: *b"abc\0" }; + let beta: &MaybeUnsizedStruct2<[u8]> =3D α + + let reference =3D &unsized_struct; + + ignore(sized_struct); // set breakpoint here + ignore(nested_sized_struct); + ignore(unsized_struct); + ignore(nested_unsized_struct); +}