This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] 10/10 non-stop for linux native


This adds non-stop support for linux native.

The changes are:

- ptracing a running thread doesn't work.

 This implies that, we must ensure that the proc_services
 usage in linux-thread-db.c talks to a pid of a stopped lwp.

 Checking if a thread is alive with ptrace doesn't work
 for running threads.  Worse, ptrace errors out claiming
 the thread doesn't exits.

- We must not stop all threads, obviously.

- We must mark threads as running if we're resuming
 them behind the core's back.

- Implement target_stop_ptid to interrupt only one thread

-- 
Pedro Alves
2008-05-06  Pedro Alves  <pedro@codesourcery.com>

	* linux-fork.c (linux_fork_killall): Use SIGKILL instead of PTRACE_KILL.
	
	* linux-nat.c (sigint_clear_callback): New.
	(linux_nat_resume): In non-stop mode, only touch the passed in
	ptid.  Clear the sigint flag.
	(linux_handle_extended_wait): On a clone event, add new lwp to
	GDB's thread table, and mark as running, executing and stopped
	appropriatelly.
	(linux_nat_wait): In non-stop mode, don't stop all lwps, unless
	sync_execution.
	(kill_callback): If lwp is not stopped, use SIGKILL.
	(linux_nat_thread_alive): Use signal 0 to detect if
	thread is alive.
	(send_sigint_callback): New.
	(linux_nat_stop): New.
	(linux_nat_stop_ptid): New.
	(linux_nat_add_target): Set to_stop and to_stop_ptid.

	* linux-nat.h (struct lwp_info): Add sigint field.

	* linux-thread-db.c (thread_from_lwp, enable_thread_event)
	(check_event): Set proc_handle.pid to the stopped lwp.
	(thread_db_find_new_threads): If current inferior is executing,
	don't try to read from it.

---
 gdb/linux-fork.c      |    4 -
 gdb/linux-nat.c       |  185 ++++++++++++++++++++++++++++++++++++++------------
 gdb/linux-nat.h       |    4 +
 gdb/linux-thread-db.c |   13 +++
 4 files changed, 164 insertions(+), 42 deletions(-)

Index: src/gdb/linux-fork.c
===================================================================
--- src.orig/gdb/linux-fork.c	2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-fork.c	2008-05-06 16:24:05.000000000 +0100
@@ -337,7 +337,9 @@ linux_fork_killall (void)
     {
       pid = PIDGET (fp->ptid);
       do {
-	ptrace (PT_KILL, pid, 0, 0);
+	/* Use SIGKILL instead of PTRACE_KILL because the former works even
+	   if the thread is running, while the later doesn't.  */
+	kill (pid, SIGKILL);
 	ret = waitpid (pid, &status, 0);
 	/* We might get a SIGCHLD instead of an exit status.  This is
 	 aggravated by the first kill above - a child has just
Index: src/gdb/linux-nat.c
===================================================================
--- src.orig/gdb/linux-nat.c	2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-nat.c	2008-05-06 16:43:18.000000000 +0100
@@ -212,6 +212,8 @@ static void linux_nat_async (void (*call
 static int linux_nat_async_mask (int mask);
 static int kill_lwp (int lwpid, int signo);
 
+static int send_sigint_callback (struct lwp_info *lp, void *data);
+
 /* Captures the result of a successful waitpid call, along with the
    options used in that call.  */
 struct waitpid_result
@@ -1466,6 +1468,13 @@ resume_set_callback (struct lwp_info *lp
   return 0;
 }
 
+static int
+sigint_clear_callback (struct lwp_info *lp, void *data)
+{
+  lp->sigint = 0;
+  return 0;
+}
+
 static void
 linux_nat_resume (ptid_t ptid, int step, enum target_signal signo)
 {
@@ -1489,10 +1498,17 @@ linux_nat_resume (ptid_t ptid, int step,
   /* A specific PTID means `step only this process id'.  */
   resume_all = (PIDGET (ptid) == -1);
 
-  if (resume_all)
-    iterate_over_lwps (resume_set_callback, NULL);
-  else
-    iterate_over_lwps (resume_clear_callback, NULL);
+  if (non_stop && resume_all)
+    internal_error (__FILE__, __LINE__,
+		    "can't resume all in non-stop mode");
+
+  if (!non_stop)
+    {
+      if (resume_all)
+	iterate_over_lwps (resume_set_callback, NULL);
+      else
+	iterate_over_lwps (resume_clear_callback, NULL);
+    }
 
   /* If PID is -1, it's the current inferior that should be
      handled specially.  */
@@ -1502,6 +1518,7 @@ linux_nat_resume (ptid_t ptid, int step,
   lp = find_lwp_pid (ptid);
   gdb_assert (lp != NULL);
 
+  /* Convert to something the lower layer understands.  */
   ptid = pid_to_ptid (GET_LWP (lp->ptid));
 
   /* Remember if we're stepping.  */
@@ -1515,6 +1532,9 @@ linux_nat_resume (ptid_t ptid, int step,
 			"LLAL: setting resumed (%d) %s\n",
 			__LINE__, target_pid_to_str (lp->ptid));
 
+  /* Remove the SIGINT mark.  Used in non-stop mode.  */
+  lp->sigint = 0;
+
   /* If we have a pending wait status for this thread, there is no
      point in resuming the process.  But first make sure that
      linux_nat_wait won't preemptively handle the event - we
@@ -1657,6 +1677,8 @@ linux_handle_extended_wait (struct lwp_i
 	ourstatus->kind = TARGET_WAITKIND_VFORKED;
       else
 	{
+	  struct cleanup *old_chain;
+
 	  ourstatus->kind = TARGET_WAITKIND_IGNORE;
 	  new_lp = add_lwp (BUILD_LWP (new_pid, GET_PID (inferior_ptid)));
 	  new_lp->cloned = 1;
@@ -1676,10 +1698,29 @@ linux_handle_extended_wait (struct lwp_i
 	  else
 	    status = 0;
 
+	  /* Make thread_db aware of this thread.  We do this this
+	     early, because we need to mark the new thread as running.
+	     thread_db needs a stopped inferior_ptid.  We know LP is
+	     stopped, so use it this time.  */
+	  old_chain = save_inferior_ptid ();
+	  inferior_ptid = lp->ptid;
+	  lp->stopped = 1;
+	  target_find_new_threads ();
+	  do_cleanups (old_chain);
+	  if (!in_thread_list (new_lp->ptid))
+	    {
+	      /* We're not using thread_db.  Attach and add it to
+		 GDB's list.  */
+	      lin_lwp_attach_lwp (new_lp->ptid);
+	      target_post_attach (GET_LWP (new_lp->ptid));
+	      add_thread (new_lp->ptid);
+	    }
+
 	  if (stopping)
 	    new_lp->stopped = 1;
 	  else
 	    {
+ 	      new_lp->stopped = 0;
 	      new_lp->resumed = 1;
 	      if (debug_linux_nat)
 		fprintf_unfiltered (gdb_stdlog,
@@ -1687,12 +1728,15 @@ linux_handle_extended_wait (struct lwp_i
 				    __LINE__, target_pid_to_str (new_lp->ptid));
 	      ptrace (PTRACE_CONT, lp->waitstatus.value.related_pid, 0,
 		      status ? WSTOPSIG (status) : 0);
+	      set_running (new_lp->ptid, 1);
+	      set_executing (new_lp->ptid, 1);
 	    }
 
 	  if (debug_linux_nat)
 	    fprintf_unfiltered (gdb_stdlog,
 				"LHEW: Got clone event from LWP %ld, resuming\n",
 				GET_LWP (lp->ptid));
+	  lp->stopped = 0;
 	  ptrace (PTRACE_CONT, GET_LWP (lp->ptid), 0, 0);
 
 	  return 1;
@@ -2412,13 +2456,8 @@ linux_nat_filter_event (int lwpid, int s
 	 not the end of the debugged application and should be
 	 ignored.  */
       if (num_lwps > 0)
-	{
-	  /* Make sure there is at least one thread running.  */
-	  gdb_assert (iterate_over_lwps (running_callback, NULL));
-
-	  /* Discard the event.  */
-	  return NULL;
-	}
+	/* Discard the event.  */
+	return NULL;
     }
 
   /* Check if the current LWP has previously exited.  In the nptl
@@ -2552,6 +2591,7 @@ linux_nat_wait (ptid_t ptid, struct targ
 			    __LINE__, target_pid_to_str (lp->ptid));
       /* Add the main thread to GDB's thread list.  */
       add_thread_silent (lp->ptid);
+      set_running (lp->ptid, 1);
     }
 
   sigemptyset (&flush_mask);
@@ -2798,19 +2838,38 @@ retry:
     fprintf_unfiltered (gdb_stdlog, "LLW: Candidate event %s in %s.\n",
 			status_to_str (status), target_pid_to_str (lp->ptid));
 
-  /* Now stop all other LWP's ...  */
-  iterate_over_lwps (stop_callback, NULL);
-
-  /* ... and wait until all of them have reported back that they're no
-     longer running.  */
-  iterate_over_lwps (stop_wait_callback, &flush_mask);
-  iterate_over_lwps (flush_callback, &flush_mask);
-
-  /* If we're not waiting for a specific LWP, choose an event LWP from
-     among those that have had events.  Giving equal priority to all
-     LWPs that have had events helps prevent starvation.  */
-  if (pid == -1)
-    select_event_lwp (&lp, &status);
+  /* When threads are created with CLONE_THREAD, SIGINT is only sent
+     to one thread in the thread group.  Send the signal to all the
+     other running threads too.  An obvious possible enhancement would
+     be to detect clones we're debugging that haven't been started
+     with CLONE_THREAD, and hence will recieve the signal
+     automatically.  */
+  if (non_stop
+      && target_can_async_p ()
+      && sync_execution
+      && WIFSTOPPED (status) && WSTOPSIG (status) == SIGINT)
+    {
+      lp->sigint = 1; /* This one has already seen SIGINT.  */
+      iterate_over_lwps (send_sigint_callback, NULL);
+    }
+
+  if (!non_stop)
+    {
+      /* Now stop all other LWP's ...  */
+      iterate_over_lwps (stop_callback, NULL);
+
+      /* ... and wait until all of them have reported back that
+	 they're no longer running.  */
+      iterate_over_lwps (stop_wait_callback, &flush_mask);
+      iterate_over_lwps (flush_callback, &flush_mask);
+
+      /* If we're not waiting for a specific LWP, choose an event LWP
+	 from among those that have had events.  Giving equal priority
+	 to all LWPs that have had events helps prevent
+	 starvation.  */
+      if (pid == -1)
+	select_event_lwp (&lp, &status);
+    }
 
   /* Now that we've selected our final event LWP, cancel any
      breakpoints in other LWPs that have hit a GDB breakpoint.  See
@@ -2850,13 +2909,25 @@ static int
 kill_callback (struct lwp_info *lp, void *data)
 {
   errno = 0;
-  ptrace (PTRACE_KILL, GET_LWP (lp->ptid), 0, 0);
-  if (debug_linux_nat)
-    fprintf_unfiltered (gdb_stdlog,
-			"KC:  PTRACE_KILL %s, 0, 0 (%s)\n",
-			target_pid_to_str (lp->ptid),
-			errno ? safe_strerror (errno) : "OK");
-
+  /* PTRACE_KILL doesn't work when the thread is running.  */
+  if (!lp->stopped)
+    {
+      kill_lwp (GET_LWP (lp->ptid), SIGKILL);
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "KC:  kill_lwp (SIGKILL) %s (%s)\n",
+			    target_pid_to_str (lp->ptid),
+			    errno ? safe_strerror (errno) : "OK");
+    }
+  else
+    {
+      ptrace (PTRACE_KILL, GET_LWP (lp->ptid), 0, 0);
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "KC:  PTRACE_KILL %s, 0, 0 (%s)\n",
+			    target_pid_to_str (lp->ptid),
+			    errno ? safe_strerror (errno) : "OK");
+    }
   return 0;
 }
 
@@ -2999,22 +3070,22 @@ linux_nat_xfer_partial (struct target_op
 static int
 linux_nat_thread_alive (ptid_t ptid)
 {
+  int err;
+
   gdb_assert (is_lwp (ptid));
 
-  errno = 0;
-  ptrace (PTRACE_PEEKUSER, GET_LWP (ptid), 0, 0);
+  /* Send signal 0 instead of anything ptrace, because ptracing a
+     running thread errors out claiming that the thread doesn't
+     exist.  */
+  err = kill_lwp (GET_LWP (ptid), 0);
+
   if (debug_linux_nat)
     fprintf_unfiltered (gdb_stdlog,
-			"LLTA: PTRACE_PEEKUSER %s, 0, 0 (%s)\n",
+			"LLTA: KILL(SIG0) %s (%s)\n",
 			target_pid_to_str (ptid),
-			errno ? safe_strerror (errno) : "OK");
+			err ? safe_strerror (err) : "OK");
 
-  /* Not every Linux kernel implements PTRACE_PEEKUSER.  But we can
-     handle that case gracefully since ptrace will first do a lookup
-     for the process based upon the passed-in pid.  If that fails we
-     will get either -ESRCH or -EPERM, otherwise the child exists and
-     is alive.  */
-  if (errno == ESRCH || errno == EPERM)
+  if (err != 0)
     return 0;
 
   return 1;
@@ -4174,6 +4245,35 @@ linux_nat_set_async_mode (int on)
   linux_nat_async_enabled = on;
 }
 
+static int
+send_sigint_callback (struct lwp_info *lp, void *data)
+{
+  if (!lp->stopped && !lp->sigint)
+    {
+      kill_lwp (GET_LWP (lp->ptid), SIGINT);
+      lp->sigint = 1;
+    }
+  return 0;
+}
+
+static void
+linux_nat_stop (void)
+{
+  if (non_stop)
+    iterate_over_lwps (send_sigint_callback, NULL);
+  else
+    linux_ops->to_stop ();
+}
+
+static void
+linux_nat_stop_ptid (ptid_t ptid)
+{
+  if (ptid_equal (ptid, minus_one_ptid))
+    iterate_over_lwps (send_sigint_callback, NULL);
+  else
+    kill_lwp (GET_LWP (ptid), SIGINT);
+}
+
 void
 linux_nat_add_target (struct target_ops *t)
 {
@@ -4204,6 +4304,9 @@ linux_nat_add_target (struct target_ops 
   t->to_terminal_inferior = linux_nat_terminal_inferior;
   t->to_terminal_ours = linux_nat_terminal_ours;
 
+  t->to_stop = linux_nat_stop;
+  t->to_stop_ptid = linux_nat_stop_ptid;
+
   /* We don't change the stratum; this target will sit at
      process_stratum and thread_db will set at thread_stratum.  This
      is a little strange, since this is a multi-threaded-capable
Index: src/gdb/linux-nat.h
===================================================================
--- src.orig/gdb/linux-nat.h	2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-nat.h	2008-05-06 16:24:05.000000000 +0100
@@ -37,6 +37,10 @@ struct lwp_info
      SIGCHLD.  */
   int cloned;
 
+  /* Non-zero if we sent this LWP a SIGINT (but the LWP didn't report
+     it back yet).  */
+  int sigint;
+
   /* Non-zero if we sent this LWP a SIGSTOP (but the LWP didn't report
      it back yet).  */
   int signalled;
Index: src/gdb/linux-thread-db.c
===================================================================
--- src.orig/gdb/linux-thread-db.c	2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-thread-db.c	2008-05-06 16:24:05.000000000 +0100
@@ -308,6 +308,8 @@ thread_from_lwp (ptid_t ptid)
      LWP.  */
   gdb_assert (GET_LWP (ptid) != 0);
 
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (ptid);
   err = td_ta_map_lwp2thr_p (thread_agent, GET_LWP (ptid), &th);
   if (err != TD_OK)
     error (_("Cannot find user-level thread for LWP %ld: %s"),
@@ -418,6 +420,9 @@ enable_thread_event (td_thragent_t *thre
   td_notify_t notify;
   td_err_e err;
 
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (inferior_ptid);
+
   /* Get the breakpoint address for thread EVENT.  */
   err = td_ta_event_addr_p (thread_agent, event, &notify);
   if (err != TD_OK)
@@ -761,6 +766,9 @@ check_event (ptid_t ptid)
   if (stop_pc != td_create_bp_addr && stop_pc != td_death_bp_addr)
     return;
 
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (ptid);
+
   /* If we are at a create breakpoint, we do not know what new lwp
      was created and cannot specifically locate the event message for it.
      We have to call td_ta_event_getmsg() to get
@@ -961,6 +969,11 @@ thread_db_find_new_threads (void)
 {
   td_err_e err;
 
+  if (in_thread_list (inferior_ptid) && is_executing (inferior_ptid))
+    return;
+
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (inferior_ptid);
   /* Iterate over all user-space threads to discover new threads.  */
   err = td_ta_thr_iter_p (thread_agent, find_new_threads_callback, NULL,
 			  TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]