public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] gdb/gcore: interrupt all threads before generating the corefile
@ 2022-10-06  9:50 Lancelot SIX
  2022-10-06 16:31 ` John Baldwin
  2022-10-11 18:44 ` Pedro Alves
  0 siblings, 2 replies; 4+ messages in thread
From: Lancelot SIX @ 2022-10-06  9:50 UTC (permalink / raw)
  To: gdb-patches; +Cc: lsix, Lancelot SIX

In non-stop mode, if the user tries to generate a core dump (using the
gcore command) while some threads are running, a non-helpful error
message is shown.

Lets consider the following session as an example (debugging the test
program included in this patch):

    (gdb) set non-stop on
    (gdb) b 37
    (gdb) r
    Thread 1 "gcore-nonstop" hit Breakpoint 1, main () at gcore-nonstop.c:39
    (gdb) info thread
       Id   Target Id                                          Frame
     * 1    Thread 0x7ffff7d7a740 (LWP 431838) "gcore-nonstop" main () * at gcore-nonstop.c:39
       2    Thread 0x7ffff7d79640 (LWP 431841) "gcore-nonstop" (running)
    (gdb) gcore
    Couldn't get registers: No such process.

The reported error ("No such process") does not help the user understand
what happens.  This is due to the fact that we cannot access the
registers of a running thread.  Even if we ignore this error, generating
a core dump while any thread might update memory would most likely
result in a core file with an inconsistent view of the process' memory.

To solve this, this patch proposes to change the gcore command so it
first stops all running threads before generating the corefile, and then
resumes them in their previous state.

The patch proposes to stop all threads across all the inferiors (not
just the current one) just in case the memory space is shared between
inferiors.

To achieve this, this patch exposes the restart_threads function in infrun.h
(used to be local to infrun.c).  We also allow the first parameter
(event_thread) to be nullptr as it is possible that the gcore command is
called while all threads are running, in which case we want all threads
to be restarted at the end of the procedure.

Tested on x86_64.
---
 gdb/gcore.c                              |  8 +++
 gdb/infrun.c                             | 16 ++----
 gdb/infrun.h                             |  9 ++++
 gdb/testsuite/gdb.base/gcore-nonstop.c   | 44 +++++++++++++++++
 gdb/testsuite/gdb.base/gcore-nonstop.exp | 62 ++++++++++++++++++++++++
 5 files changed, 128 insertions(+), 11 deletions(-)
 create mode 100644 gdb/testsuite/gdb.base/gcore-nonstop.c
 create mode 100644 gdb/testsuite/gdb.base/gcore-nonstop.exp

diff --git a/gdb/gcore.c b/gdb/gcore.c
index 519007714e5..664318b4161 100644
--- a/gdb/gcore.c
+++ b/gdb/gcore.c
@@ -34,6 +34,7 @@
 #include "regset.h"
 #include "gdb_bfd.h"
 #include "readline/tilde.h"
+#include "infrun.h"
 #include <algorithm>
 #include "gdbsupport/gdb_unlinker.h"
 #include "gdbsupport/byte-vector.h"
@@ -131,6 +132,10 @@ gcore_command (const char *args, int from_tty)
   if (!target_has_execution ())
     noprocess ();
 
+  scoped_restore_current_thread restore_current_thread;
+  scoped_disable_commit_resumed disable_commit_resume ("generating coredump");
+  stop_all_threads ("generating coredump");
+
   if (args && *args)
     corefilename.reset (tilde_expand (args));
   else
@@ -161,6 +166,9 @@ gcore_command (const char *args, int from_tty)
     }
 
   gdb_printf ("Saved corefile %s\n", corefilename.get ());
+
+  restart_threads (nullptr, nullptr);
+  disable_commit_resume.reset_and_commit ();
 }
 
 static enum bfd_architecture
diff --git a/gdb/infrun.c b/gdb/infrun.c
index 1957e8020dd..34fcb2f92dd 100644
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -96,9 +96,6 @@ static void resume (gdb_signal sig);
 
 static void wait_for_inferior (inferior *inf);
 
-static void restart_threads (struct thread_info *event_thread,
-			     inferior *inf = nullptr);
-
 static bool start_step_over (void);
 
 static bool step_over_info_valid_p (void);
@@ -5889,18 +5886,15 @@ handle_inferior_event (struct execution_control_state *ecs)
     }
 }
 
-/* Restart threads back to what they were trying to do back when we
-   paused them (because of an in-line step-over or vfork, for example).
-   The EVENT_THREAD thread is ignored (not restarted).
-
-   If INF is non-nullptr, only resume threads from INF.  */
+/* See infrun.h.  */
 
-static void
+void
 restart_threads (struct thread_info *event_thread, inferior *inf)
 {
   INFRUN_SCOPED_DEBUG_START_END ("event_thread=%s, inf=%d",
-				 event_thread->ptid.to_string ().c_str (),
-				 inf != nullptr ? inf->num : -1);
+				 (event_thread != nullptr
+				  ? event_thread->ptid.to_string ().c_str ()
+				  : "None"), inf != nullptr ? inf->num : -1);
 
   gdb_assert (!step_over_info_valid_p ());
 
diff --git a/gdb/infrun.h b/gdb/infrun.h
index 0c7c55eabec..81d00f6da7e 100644
--- a/gdb/infrun.h
+++ b/gdb/infrun.h
@@ -173,6 +173,15 @@ extern void nullify_last_target_wait_ptid ();
    all threads of all inferiors.  */
 extern void stop_all_threads (const char *reason, inferior *inf = nullptr);
 
+/* Restart threads back to what they were trying to do back when we
+   paused them (because of an in-line step-over or vfork, for example).
+   The EVENT_THREAD thread, if non-nullptr, is ignored (not restarted).
+
+   If INF is non-nullptr, only resume threads from INF.  */
+
+extern void restart_threads (struct thread_info *event_thread,
+			     inferior *inf = nullptr);
+
 extern void prepare_for_detach (void);
 
 extern void fetch_inferior_event ();
diff --git a/gdb/testsuite/gdb.base/gcore-nonstop.c b/gdb/testsuite/gdb.base/gcore-nonstop.c
new file mode 100644
index 00000000000..191a1a26849
--- /dev/null
+++ b/gdb/testsuite/gdb.base/gcore-nonstop.c
@@ -0,0 +1,44 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2022 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+
+static pthread_barrier_t barrier;
+
+static void *
+worker_func (void *)
+{
+  pthread_barrier_wait (&barrier);
+  return NULL;
+}
+
+int
+main (void)
+{
+  pthread_t worker_thread;
+  pthread_barrier_init (&barrier, NULL, 2);
+
+  pthread_create (&worker_thread, NULL, worker_func, NULL);
+
+  /* Break here.  */
+
+  pthread_barrier_wait (&barrier);
+  pthread_join (worker_thread, NULL);
+  pthread_barrier_destroy (&barrier);
+
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.base/gcore-nonstop.exp b/gdb/testsuite/gdb.base/gcore-nonstop.exp
new file mode 100644
index 00000000000..6c9ed4ad342
--- /dev/null
+++ b/gdb/testsuite/gdb.base/gcore-nonstop.exp
@@ -0,0 +1,62 @@
+# Copyright 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This testcase checks that when in non-stop mode with some threads running
+# the gcore command can interrupt all threads, generate a core dump and
+# restart threads as required.
+
+standard_testfile
+
+if { [prepare_for_testing "failed to prepare" \
+	${testfile} ${srcfile} {threads debug}] } {
+    return
+}
+
+gdb_test_no_output "set non-stop on"
+set lineno [gdb_get_line_number "Break here"]
+if { ![runto $lineno] } {
+    return
+}
+
+# We should be stopped in thread 1 while thread 2 is running
+gdb_test_sequence "info threads" "info threads" {
+    {Id\s+Target Id\s+Frame}
+    {\*\s+1[^\n]*\n}
+    {\s+2\s+[^\n]*\(running\)[^\n]*\n}
+}
+
+set corefile [standard_output_file "corefile"]
+if {![gdb_gcore_cmd $corefile "generate corefile"]} {
+  # gdb_gcore_cmd did would generate a unsupported.
+  return
+}
+
+# After the corefile is generated, thread 2 should be back running
+# and thread 1 should still be selectd
+gdb_test_sequence "info threads" "correct thread selection after gcore" {
+    {Id\s+Target Id\s+Frame}
+    {\*\s+1[^\n]*\n}
+    {\s+2\s+[^\n]*\(running\)[^\n]*\n}
+}
+
+clean_restart $binfile
+gdb_test "core-file $corefile" "Core was generated by.*" "load corefile"
+
+# The corefile has the 2 threads
+gdb_test_sequence "info threads" "threads in corefile" {
+    {Id\s+Target Id\s+Frame}
+    {\s+1\s+Thread[^\n]*\n}
+    {\s+2\s+Thread[^\n]*\n}
+}

base-commit: a13886e2198beb78b81c59839043b021ce6df78a
-- 
2.34.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] gdb/gcore: interrupt all threads before generating the corefile
  2022-10-06  9:50 [PATCH] gdb/gcore: interrupt all threads before generating the corefile Lancelot SIX
@ 2022-10-06 16:31 ` John Baldwin
  2022-10-11 18:44 ` Pedro Alves
  1 sibling, 0 replies; 4+ messages in thread
From: John Baldwin @ 2022-10-06 16:31 UTC (permalink / raw)
  To: Lancelot SIX, gdb-patches; +Cc: lsix

On 10/6/22 2:50 AM, Lancelot SIX via Gdb-patches wrote:
> In non-stop mode, if the user tries to generate a core dump (using the
> gcore command) while some threads are running, a non-helpful error
> message is shown.
> 
> Lets consider the following session as an example (debugging the test
> program included in this patch):
> 
>      (gdb) set non-stop on
>      (gdb) b 37
>      (gdb) r
>      Thread 1 "gcore-nonstop" hit Breakpoint 1, main () at gcore-nonstop.c:39
>      (gdb) info thread
>         Id   Target Id                                          Frame
>       * 1    Thread 0x7ffff7d7a740 (LWP 431838) "gcore-nonstop" main () * at gcore-nonstop.c:39
>         2    Thread 0x7ffff7d79640 (LWP 431841) "gcore-nonstop" (running)
>      (gdb) gcore
>      Couldn't get registers: No such process.
> 
> The reported error ("No such process") does not help the user understand
> what happens.  This is due to the fact that we cannot access the
> registers of a running thread.  Even if we ignore this error, generating
> a core dump while any thread might update memory would most likely
> result in a core file with an inconsistent view of the process' memory.
> 
> To solve this, this patch proposes to change the gcore command so it
> first stops all running threads before generating the corefile, and then
> resumes them in their previous state.
> 
> The patch proposes to stop all threads across all the inferiors (not
> just the current one) just in case the memory space is shared between
> inferiors.
I agree with the idea in concept (gcore should stop all threads so it can
write out a consistent snapshot).

-- 
John Baldwin

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] gdb/gcore: interrupt all threads before generating the corefile
  2022-10-06  9:50 [PATCH] gdb/gcore: interrupt all threads before generating the corefile Lancelot SIX
  2022-10-06 16:31 ` John Baldwin
@ 2022-10-11 18:44 ` Pedro Alves
  2022-10-12 17:12   ` Lancelot SIX
  1 sibling, 1 reply; 4+ messages in thread
From: Pedro Alves @ 2022-10-11 18:44 UTC (permalink / raw)
  To: Lancelot SIX, gdb-patches; +Cc: lsix

On 2022-10-06 10:50 a.m., Lancelot SIX via Gdb-patches wrote:
> In non-stop mode, if the user tries to generate a core dump (using the
> gcore command) while some threads are running, a non-helpful error
> message is shown.
> 
> Lets consider the following session as an example (debugging the test
> program included in this patch):
> 
>     (gdb) set non-stop on
>     (gdb) b 37
>     (gdb) r
>     Thread 1 "gcore-nonstop" hit Breakpoint 1, main () at gcore-nonstop.c:39
>     (gdb) info thread
>        Id   Target Id                                          Frame
>      * 1    Thread 0x7ffff7d7a740 (LWP 431838) "gcore-nonstop" main () * at gcore-nonstop.c:39

Weird " * " after "main ()".

>        2    Thread 0x7ffff7d79640 (LWP 431841) "gcore-nonstop" (running)
>     (gdb) gcore
>     Couldn't get registers: No such process.
> 
> The reported error ("No such process") does not help the user understand
> what happens.  This is due to the fact that we cannot access the
> registers of a running thread.  Even if we ignore this error, generating
> a core dump while any thread might update memory would most likely
> result in a core file with an inconsistent view of the process' memory.
> 
> To solve this, this patch proposes to change the gcore command so it
> first stops all running threads before generating the corefile, and then
> resumes them in their previous state.
> 
> The patch proposes to stop all threads across all the inferiors (not
> just the current one) just in case the memory space is shared between
> inferiors.

The memory space may be shared with processes we're not debugging, too, though.

Seems odd to stop threads running on other targets, if we can easily help it.
E.g., you have 10 inferiors loaded, all running on different remote targets.
Stopping all inferiors means we stop the inferiors running on all those different
remote targets.

> 
> To achieve this, this patch exposes the restart_threads function in infrun.h
> (used to be local to infrun.c).  We also allow the first parameter
> (event_thread) to be nullptr as it is possible that the gcore command is
> called while all threads are running, in which case we want all threads
> to be restarted at the end of the procedure.
> 
> Tested on x86_64.
> ---
>  gdb/gcore.c                              |  8 +++
>  gdb/infrun.c                             | 16 ++----
>  gdb/infrun.h                             |  9 ++++
>  gdb/testsuite/gdb.base/gcore-nonstop.c   | 44 +++++++++++++++++
>  gdb/testsuite/gdb.base/gcore-nonstop.exp | 62 ++++++++++++++++++++++++
>  5 files changed, 128 insertions(+), 11 deletions(-)
>  create mode 100644 gdb/testsuite/gdb.base/gcore-nonstop.c
>  create mode 100644 gdb/testsuite/gdb.base/gcore-nonstop.exp
> 
> diff --git a/gdb/gcore.c b/gdb/gcore.c
> index 519007714e5..664318b4161 100644
> --- a/gdb/gcore.c
> +++ b/gdb/gcore.c
> @@ -34,6 +34,7 @@
>  #include "regset.h"
>  #include "gdb_bfd.h"
>  #include "readline/tilde.h"
> +#include "infrun.h"
>  #include <algorithm>
>  #include "gdbsupport/gdb_unlinker.h"
>  #include "gdbsupport/byte-vector.h"
> @@ -131,6 +132,10 @@ gcore_command (const char *args, int from_tty)
>    if (!target_has_execution ())
>      noprocess ();
>  
> +  scoped_restore_current_thread restore_current_thread;
> +  scoped_disable_commit_resumed disable_commit_resume ("generating coredump");
> +  stop_all_threads ("generating coredump");
> +
>    if (args && *args)
>      corefilename.reset (tilde_expand (args));
>    else
> @@ -161,6 +166,9 @@ gcore_command (const char *args, int from_tty)
>      }
>  
>    gdb_printf ("Saved corefile %s\n", corefilename.get ());

So if something goes wrong dumping core, and we throw an error, we end
up with all threads internally stopped, while "info threads" will show
all threads as "running".  That could be fixed with a scoped_finish_thread_state.

(Alternatively we could instead just error out if a thread is running.)

If we go with the auto-stop, then this should be documented in the manual, and
get a NEWS entry, IMHO.

> +
> +  restart_threads (nullptr, nullptr);
> +  disable_commit_resume.reset_and_commit ();
>  }
>  
>  static enum bfd_architecture
> diff --git a/gdb/infrun.c b/gdb/infrun.c
> index 1957e8020dd..34fcb2f92dd 100644
> --- a/gdb/infrun.c
> +++ b/gdb/infrun.c
> @@ -96,9 +96,6 @@ static void resume (gdb_signal sig);
>  
>  static void wait_for_inferior (inferior *inf);
>  
> -static void restart_threads (struct thread_info *event_thread,
> -			     inferior *inf = nullptr);
> -
>  static bool start_step_over (void);
>  
>  static bool step_over_info_valid_p (void);
> @@ -5889,18 +5886,15 @@ handle_inferior_event (struct execution_control_state *ecs)
>      }
>  }
>  
> -/* Restart threads back to what they were trying to do back when we
> -   paused them (because of an in-line step-over or vfork, for example).
> -   The EVENT_THREAD thread is ignored (not restarted).
> -
> -   If INF is non-nullptr, only resume threads from INF.  */
> +/* See infrun.h.  */
>  
> -static void
> +void
>  restart_threads (struct thread_info *event_thread, inferior *inf)
>  {
>    INFRUN_SCOPED_DEBUG_START_END ("event_thread=%s, inf=%d",
> -				 event_thread->ptid.to_string ().c_str (),
> -				 inf != nullptr ? inf->num : -1);
> +				 (event_thread != nullptr
> +				  ? event_thread->ptid.to_string ().c_str ()
> +				  : "None"), inf != nullptr ? inf->num : -1);
>  
>    gdb_assert (!step_over_info_valid_p ());
>  
> diff --git a/gdb/infrun.h b/gdb/infrun.h
> index 0c7c55eabec..81d00f6da7e 100644
> --- a/gdb/infrun.h
> +++ b/gdb/infrun.h
> @@ -173,6 +173,15 @@ extern void nullify_last_target_wait_ptid ();
>     all threads of all inferiors.  */
>  extern void stop_all_threads (const char *reason, inferior *inf = nullptr);
>  
> +/* Restart threads back to what they were trying to do back when we
> +   paused them (because of an in-line step-over or vfork, for example).
> +   The EVENT_THREAD thread, if non-nullptr, is ignored (not restarted).
> +
> +   If INF is non-nullptr, only resume threads from INF.  */
> +
> +extern void restart_threads (struct thread_info *event_thread,
> +			     inferior *inf = nullptr);
> +
>  extern void prepare_for_detach (void);
>  
>  extern void fetch_inferior_event ();
> diff --git a/gdb/testsuite/gdb.base/gcore-nonstop.c b/gdb/testsuite/gdb.base/gcore-nonstop.c
> new file mode 100644
> index 00000000000..191a1a26849
> --- /dev/null
> +++ b/gdb/testsuite/gdb.base/gcore-nonstop.c
> @@ -0,0 +1,44 @@
> +/* This testcase is part of GDB, the GNU debugger.
> +
> +   Copyright 2022 Free Software Foundation, Inc.
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   This program is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include <pthread.h>
> +
> +static pthread_barrier_t barrier;
> +
> +static void *
> +worker_func (void *)
> +{
> +  pthread_barrier_wait (&barrier);
> +  return NULL;
> +}
> +
> +int
> +main (void)
> +{
> +  pthread_t worker_thread;
> +  pthread_barrier_init (&barrier, NULL, 2);
> +
> +  pthread_create (&worker_thread, NULL, worker_func, NULL);
> +
> +  /* Break here.  */
> +
> +  pthread_barrier_wait (&barrier);
> +  pthread_join (worker_thread, NULL);
> +  pthread_barrier_destroy (&barrier);
> +
> +  return 0;
> +}
> diff --git a/gdb/testsuite/gdb.base/gcore-nonstop.exp b/gdb/testsuite/gdb.base/gcore-nonstop.exp
> new file mode 100644
> index 00000000000..6c9ed4ad342
> --- /dev/null
> +++ b/gdb/testsuite/gdb.base/gcore-nonstop.exp
> @@ -0,0 +1,62 @@
> +# Copyright 2022 Free Software Foundation, Inc.
> +
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +
> +# This testcase checks that when in non-stop mode with some threads running
> +# the gcore command can interrupt all threads, generate a core dump and
> +# restart threads as required.
> +
> +standard_testfile
> +
> +if { [prepare_for_testing "failed to prepare" \
> +	${testfile} ${srcfile} {threads debug}] } {
> +    return
> +}
> +
> +gdb_test_no_output "set non-stop on"
> +set lineno [gdb_get_line_number "Break here"]
> +if { ![runto $lineno] } {
> +    return
> +}
> +
> +# We should be stopped in thread 1 while thread 2 is running

Please add missing end period.

> +gdb_test_sequence "info threads" "info threads" {
> +    {Id\s+Target Id\s+Frame}
> +    {\*\s+1[^\n]*\n}
> +    {\s+2\s+[^\n]*\(running\)[^\n]*\n}
> +}
> +
> +set corefile [standard_output_file "corefile"]
> +if {![gdb_gcore_cmd $corefile "generate corefile"]} {
> +  # gdb_gcore_cmd did would generate a unsupported.

"did would" does not parse.

> +  return
> +}
> +
> +# After the corefile is generated, thread 2 should be back running
> +# and thread 1 should still be selectd

selectd -> selected

Also, missing period.

> +gdb_test_sequence "info threads" "correct thread selection after gcore" {
> +    {Id\s+Target Id\s+Frame}
> +    {\*\s+1[^\n]*\n}
> +    {\s+2\s+[^\n]*\(running\)[^\n]*\n}

Might be good to also check that thread 1 is still stopped, and stopped
where it was stopped before.

> +}
> +
> +clean_restart $binfile
> +gdb_test "core-file $corefile" "Core was generated by.*" "load corefile"
> +
> +# The corefile has the 2 threads

Missing period.

> +gdb_test_sequence "info threads" "threads in corefile" {
> +    {Id\s+Target Id\s+Frame}
> +    {\s+1\s+Thread[^\n]*\n}
> +    {\s+2\s+Thread[^\n]*\n}
> +}
> 
> base-commit: a13886e2198beb78b81c59839043b021ce6df78a
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] gdb/gcore: interrupt all threads before generating the corefile
  2022-10-11 18:44 ` Pedro Alves
@ 2022-10-12 17:12   ` Lancelot SIX
  0 siblings, 0 replies; 4+ messages in thread
From: Lancelot SIX @ 2022-10-12 17:12 UTC (permalink / raw)
  To: Pedro Alves, gdb-patches; +Cc: lsix

Hi,

Thanks for the review!

On 11/10/2022 19:44, Pedro Alves wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
> 
> 
> On 2022-10-06 10:50 a.m., Lancelot SIX via Gdb-patches wrote:
>> In non-stop mode, if the user tries to generate a core dump (using the
>> gcore command) while some threads are running, a non-helpful error
>> message is shown.
>>
>> Lets consider the following session as an example (debugging the test
>> program included in this patch):
>>
>>      (gdb) set non-stop on
>>      (gdb) b 37
>>      (gdb) r
>>      Thread 1 "gcore-nonstop" hit Breakpoint 1, main () at gcore-nonstop.c:39
>>      (gdb) info thread
>>         Id   Target Id                                          Frame
>>       * 1    Thread 0x7ffff7d7a740 (LWP 431838) "gcore-nonstop" main () * at gcore-nonstop.c:39
> 
> Weird " * " after "main ()".

My bad.  I fixed this locally.

> 
>>         2    Thread 0x7ffff7d79640 (LWP 431841) "gcore-nonstop" (running)
>>      (gdb) gcore
>>      Couldn't get registers: No such process.
>>
>> The reported error ("No such process") does not help the user understand
>> what happens.  This is due to the fact that we cannot access the
>> registers of a running thread.  Even if we ignore this error, generating
>> a core dump while any thread might update memory would most likely
>> result in a core file with an inconsistent view of the process' memory.
>>
>> To solve this, this patch proposes to change the gcore command so it
>> first stops all running threads before generating the corefile, and then
>> resumes them in their previous state.
>>
>> The patch proposes to stop all threads across all the inferiors (not
>> just the current one) just in case the memory space is shared between
>> inferiors.
> 
> The memory space may be shared with processes we're not debugging, too, though.

This is true.

> 
> Seems odd to stop threads running on other targets, if we can easily help it.
> E.g., you have 10 inferiors loaded, all running on different remote targets.
> Stopping all inferiors means we stop the inferiors running on all those different
> remote targets.
> 

I have updated my patch to only stop threads for the current inferior. 
Considering that some part of the address space might be shared with 
process outside of GDB's control, it does seem to me that this the best 
we can do.

>>
>> To achieve this, this patch exposes the restart_threads function in infrun.h
>> (used to be local to infrun.c).  We also allow the first parameter
>> (event_thread) to be nullptr as it is possible that the gcore command is
>> called while all threads are running, in which case we want all threads
>> to be restarted at the end of the procedure.
>>
>> Tested on x86_64.
>> ---
>>   gdb/gcore.c                              |  8 +++
>>   gdb/infrun.c                             | 16 ++----
>>   gdb/infrun.h                             |  9 ++++
>>   gdb/testsuite/gdb.base/gcore-nonstop.c   | 44 +++++++++++++++++
>>   gdb/testsuite/gdb.base/gcore-nonstop.exp | 62 ++++++++++++++++++++++++
>>   5 files changed, 128 insertions(+), 11 deletions(-)
>>   create mode 100644 gdb/testsuite/gdb.base/gcore-nonstop.c
>>   create mode 100644 gdb/testsuite/gdb.base/gcore-nonstop.exp
>>
>> diff --git a/gdb/gcore.c b/gdb/gcore.c
>> index 519007714e5..664318b4161 100644
>> --- a/gdb/gcore.c
>> +++ b/gdb/gcore.c
>> @@ -34,6 +34,7 @@
>>   #include "regset.h"
>>   #include "gdb_bfd.h"
>>   #include "readline/tilde.h"
>> +#include "infrun.h"
>>   #include <algorithm>
>>   #include "gdbsupport/gdb_unlinker.h"
>>   #include "gdbsupport/byte-vector.h"
>> @@ -131,6 +132,10 @@ gcore_command (const char *args, int from_tty)
>>     if (!target_has_execution ())
>>       noprocess ();
>>
>> +  scoped_restore_current_thread restore_current_thread;
>> +  scoped_disable_commit_resumed disable_commit_resume ("generating coredump");
>> +  stop_all_threads ("generating coredump");
>> +
>>     if (args && *args)
>>       corefilename.reset (tilde_expand (args));
>>     else
>> @@ -161,6 +166,9 @@ gcore_command (const char *args, int from_tty)
>>       }
>>
>>     gdb_printf ("Saved corefile %s\n", corefilename.get ());
> 
> So if something goes wrong dumping core, and we throw an error, we end
> up with all threads internally stopped, while "info threads" will show
> all threads as "running".  That could be fixed with a scoped_finish_thread_state.
> 

Thanks for catching that!  I am now using a scoped_finish_thread_state 
in my local patch.

> (Alternatively we could instead just error out if a thread is running.)
> 
> If we go with the auto-stop, then this should be documented in the manual, and
> get a NEWS entry, IMHO.
> 

I think that if I can get an acceptable auto-stop implementation, I 
would prefer that.

>> +
>> +  restart_threads (nullptr, nullptr);
>> +  disable_commit_resume.reset_and_commit ();
>>   }
>>
>>   static enum bfd_architecture
>> diff --git a/gdb/infrun.c b/gdb/infrun.c
>> index 1957e8020dd..34fcb2f92dd 100644
>> --- a/gdb/infrun.c
>> +++ b/gdb/infrun.c
>> @@ -96,9 +96,6 @@ static void resume (gdb_signal sig);
>>
>>   static void wait_for_inferior (inferior *inf);
>>
>> -static void restart_threads (struct thread_info *event_thread,
>> -                          inferior *inf = nullptr);
>> -
>>   static bool start_step_over (void);
>>
>>   static bool step_over_info_valid_p (void);
>> @@ -5889,18 +5886,15 @@ handle_inferior_event (struct execution_control_state *ecs)
>>       }
>>   }
>>
>> -/* Restart threads back to what they were trying to do back when we
>> -   paused them (because of an in-line step-over or vfork, for example).
>> -   The EVENT_THREAD thread is ignored (not restarted).
>> -
>> -   If INF is non-nullptr, only resume threads from INF.  */
>> +/* See infrun.h.  */
>>
>> -static void
>> +void
>>   restart_threads (struct thread_info *event_thread, inferior *inf)
>>   {
>>     INFRUN_SCOPED_DEBUG_START_END ("event_thread=%s, inf=%d",
>> -                              event_thread->ptid.to_string ().c_str (),
>> -                              inf != nullptr ? inf->num : -1);
>> +                              (event_thread != nullptr
>> +                               ? event_thread->ptid.to_string ().c_str ()
>> +                               : "None"), inf != nullptr ? inf->num : -1);
>>
>>     gdb_assert (!step_over_info_valid_p ());
>>
>> diff --git a/gdb/infrun.h b/gdb/infrun.h
>> index 0c7c55eabec..81d00f6da7e 100644
>> --- a/gdb/infrun.h
>> +++ b/gdb/infrun.h
>> @@ -173,6 +173,15 @@ extern void nullify_last_target_wait_ptid ();
>>      all threads of all inferiors.  */
>>   extern void stop_all_threads (const char *reason, inferior *inf = nullptr);
>>
>> +/* Restart threads back to what they were trying to do back when we
>> +   paused them (because of an in-line step-over or vfork, for example).
>> +   The EVENT_THREAD thread, if non-nullptr, is ignored (not restarted).
>> +
>> +   If INF is non-nullptr, only resume threads from INF.  */
>> +
>> +extern void restart_threads (struct thread_info *event_thread,
>> +                          inferior *inf = nullptr);
>> +
>>   extern void prepare_for_detach (void);
>>
>>   extern void fetch_inferior_event ();
>> diff --git a/gdb/testsuite/gdb.base/gcore-nonstop.c b/gdb/testsuite/gdb.base/gcore-nonstop.c
>> new file mode 100644
>> index 00000000000..191a1a26849
>> --- /dev/null
>> +++ b/gdb/testsuite/gdb.base/gcore-nonstop.c
>> @@ -0,0 +1,44 @@
>> +/* This testcase is part of GDB, the GNU debugger.
>> +
>> +   Copyright 2022 Free Software Foundation, Inc.
>> +
>> +   This program is free software; you can redistribute it and/or modify
>> +   it under the terms of the GNU General Public License as published by
>> +   the Free Software Foundation; either version 3 of the License, or
>> +   (at your option) any later version.
>> +
>> +   This program is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +   GNU General Public License for more details.
>> +
>> +   You should have received a copy of the GNU General Public License
>> +   along with this program.  If not, see <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.gnu.org%2Flicenses%2F&amp;data=05%7C01%7Clancelot.six%40amd.com%7C32f863601cc64e79f5ca08daabb8b3a4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638011107356491655%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C2000%7C%7C%7C&amp;sdata=K%2BrRGL3r6215qsEYGszy%2BEpO1Jg3F5K8EAcWVKHdkYo%3D&amp;reserved=0>.  */
>> +
>> +#include <pthread.h>
>> +
>> +static pthread_barrier_t barrier;
>> +
>> +static void *
>> +worker_func (void *)
>> +{
>> +  pthread_barrier_wait (&barrier);
>> +  return NULL;
>> +}
>> +
>> +int
>> +main (void)
>> +{
>> +  pthread_t worker_thread;
>> +  pthread_barrier_init (&barrier, NULL, 2);
>> +
>> +  pthread_create (&worker_thread, NULL, worker_func, NULL);
>> +
>> +  /* Break here.  */
>> +
>> +  pthread_barrier_wait (&barrier);
>> +  pthread_join (worker_thread, NULL);
>> +  pthread_barrier_destroy (&barrier);
>> +
>> +  return 0;
>> +}
>> diff --git a/gdb/testsuite/gdb.base/gcore-nonstop.exp b/gdb/testsuite/gdb.base/gcore-nonstop.exp
>> new file mode 100644
>> index 00000000000..6c9ed4ad342
>> --- /dev/null
>> +++ b/gdb/testsuite/gdb.base/gcore-nonstop.exp
>> @@ -0,0 +1,62 @@
>> +# Copyright 2022 Free Software Foundation, Inc.
>> +
>> +# This program is free software; you can redistribute it and/or modify
>> +# it under the terms of the GNU General Public License as published by
>> +# the Free Software Foundation; either version 3 of the License, or
>> +# (at your option) any later version.
>> +#
>> +# This program is distributed in the hope that it will be useful,
>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +# GNU General Public License for more details.
>> +#
>> +# You should have received a copy of the GNU General Public License
>> +# along with this program.  If not, see <https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.gnu.org%2Flicenses%2F&amp;data=05%7C01%7Clancelot.six%40amd.com%7C32f863601cc64e79f5ca08daabb8b3a4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638011107356491655%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C2000%7C%7C%7C&amp;sdata=K%2BrRGL3r6215qsEYGszy%2BEpO1Jg3F5K8EAcWVKHdkYo%3D&amp;reserved=0>.
>> +
>> +# This testcase checks that when in non-stop mode with some threads running
>> +# the gcore command can interrupt all threads, generate a core dump and
>> +# restart threads as required.
>> +
>> +standard_testfile
>> +
>> +if { [prepare_for_testing "failed to prepare" \
>> +     ${testfile} ${srcfile} {threads debug}] } {
>> +    return
>> +}
>> +
>> +gdb_test_no_output "set non-stop on"
>> +set lineno [gdb_get_line_number "Break here"]
>> +if { ![runto $lineno] } {
>> +    return
>> +}
>> +
>> +# We should be stopped in thread 1 while thread 2 is running
> 
> Please add missing end period.

Done locally.

> 
>> +gdb_test_sequence "info threads" "info threads" {
>> +    {Id\s+Target Id\s+Frame}
>> +    {\*\s+1[^\n]*\n}
>> +    {\s+2\s+[^\n]*\(running\)[^\n]*\n}
>> +}
>> +
>> +set corefile [standard_output_file "corefile"]
>> +if {![gdb_gcore_cmd $corefile "generate corefile"]} {
>> +  # gdb_gcore_cmd did would generate a unsupported.
> 
> "did would" does not parse.
> 
>> +  return
>> +}
>> +
>> +# After the corefile is generated, thread 2 should be back running
>> +# and thread 1 should still be selectd
> 
> selectd -> selected
> 
> Also, missing period.

Done.

> 
>> +gdb_test_sequence "info threads" "correct thread selection after gcore" {
>> +    {Id\s+Target Id\s+Frame}
>> +    {\*\s+1[^\n]*\n}
>> +    {\s+2\s+[^\n]*\(running\)[^\n]*\n}
> 
> Might be good to also check that thread 1 is still stopped, and stopped
> where it was stopped before.
> 
>> +}
>> +
>> +clean_restart $binfile
>> +gdb_test "core-file $corefile" "Core was generated by.*" "load corefile"
>> +
>> +# The corefile has the 2 threads
> 
> Missing period.
> 
>> +gdb_test_sequence "info threads" "threads in corefile" {
>> +    {Id\s+Target Id\s+Frame}
>> +    {\s+1\s+Thread[^\n]*\n}
>> +    {\s+2\s+Thread[^\n]*\n}
>> +}
>>
>> base-commit: a13886e2198beb78b81c59839043b021ce6df78a
>>
> 

I'll prepare a V2 to send later.

Thanks,
Lancelot.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-10-12 17:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-06  9:50 [PATCH] gdb/gcore: interrupt all threads before generating the corefile Lancelot SIX
2022-10-06 16:31 ` John Baldwin
2022-10-11 18:44 ` Pedro Alves
2022-10-12 17:12   ` Lancelot SIX

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).