This is the mail archive of the gdb-cvs@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[binutils-gdb] gdb.base/gdb-sigterm.exp: Fix spurious FAILs


https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=77f41761432a70930ea0a917a2f135b392af34f5

commit 77f41761432a70930ea0a917a2f135b392af34f5
Author: Pedro Alves <palves@redhat.com>
Date:   Fri Feb 6 11:09:42 2015 +0100

    gdb.base/gdb-sigterm.exp: Fix spurious FAILs
    
    The buildbot shows that some machines FAIL this test frequently.
    E.g.: https://sourceware.org/ml/gdb-testers/2015-q1/msg00997.html
    
    If I stress my machine, I can sometimes see it fail too.
    
    Bumping the 200 limit and tweaking the test to show the step count, I
    get:
    
         ...
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 12 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 8 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 13 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 7 times
    -->  FAIL: gdb.base/gdb-sigterm.exp: SIGTERM stepped 228 times <--
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 11 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 13 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 12 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 8 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 9 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 7 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 11 times
         PASS: gdb.base/gdb-sigterm.exp: SIGTERM stepped 8 times
         ...
    
    Thinking that this might be a problem of SIGTERM reaching GDB, but
    then the event loop taking too long to handle it, I hacked GDB to
    print a debug log whenever the SIGTERM handler was called, and,
    whenever the event loop finally calls the async SIGTERM handler.
    Here's what I see:
    
         infrun:   30011 [Thread 30011],
         infrun:   status->kind = stopped, signal = GDB_SIGNAL_TRAP
         infrun: TARGET_WAITKIND_STOPPED
         infrun: stop_pc = 0x4005de
    -->  infrun: got SIGTERM                                       <--
         infrun: stepping inside range [0x4005de-0x4005e0]
         infrun: resume (step=1, signal=GDB_SIGNAL_0), ...
         infrun: prepare_to_wait
    -->  infrun: handling async SIGTERM                            <--
         Cannot execute this command while the target is running.
         Use the "interrupt" command to stop the target
         and then try again.
         gdb.base/gdb-sigterm.exp: expect eof #27
         FAIL: gdb.base/gdb-sigterm.exp: SIGTERM stepped 228 times
    
    So, no delay on the GDB side.  It just happens that occasionally it
    takes more than 200 single-steps before SIGTERM even reaches GDB.
    This just looks like a kernel/scheduling issue --- some extra usage
    spike in the system (e.g., an I/O spike) might cause it for me.  For
    the build slaves, I'm guessing they're frequently busy enough to trip
    on this often.  Particularly more so now that we're having them run
    tests in parallel mode.
    
    The fix is to detect failure by timeout instead of counting single
    steps.  This should be more reliable.  Indeed for me, after this
    commit, I couldn't trigger a FAIL anymore, even after letting the test
    run for an hour.
    
    By timeout is also nicer in that a board file for a slow host/target
    can increase it (like, e.g., an embedded GNU/Linux board).
    
    Tested on x86_64 Fedora 20, native, gdbserver, and extended-remote
    gdbserver.
    
    gdb/testsuite/
    2015-02-06  Pedro Alves  <palves@redhat.com>
    
    	* gdb.base/gdb-sigterm.c (main): Use the TIMEOUT define to
    	determine how many seconds to pass to 'alarm'.
    	* gdb.base/gdb-sigterm.exp (top level): Build program with
    	-DTIMEOUT=$timeout.
    	(do_test): Return success/failure indication.  Add more verbose
    	logging.  Don't fail if 200 single steps are seen.  Instead, fail
    	when the test times out.
    	(passes): New global.
    	(top level): Break the testing loop if testing fails on any
    	iteration.  Use gdb_assert.

Diff:
---
 gdb/testsuite/ChangeLog                | 13 ++++++++++++
 gdb/testsuite/gdb.base/gdb-sigterm.c   |  6 +++++-
 gdb/testsuite/gdb.base/gdb-sigterm.exp | 39 +++++++++++++++++++++-------------
 3 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog
index 6fdaa89..5648511 100644
--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@@ -1,3 +1,16 @@
+2015-02-06  Pedro Alves  <palves@redhat.com>
+
+	* gdb.base/gdb-sigterm.c (main): Use the TIMEOUT define to
+	determine how many seconds to pass to 'alarm'.
+	* gdb.base/gdb-sigterm.exp (top level): Build program with
+	-DTIMEOUT=$timeout.
+	(do_test): Return success/failure indication.  Add more verbose
+	logging.  Don't fail if 200 single steps are seen.  Instead, fail
+	when the test times out.
+	(passes): New global.
+	(top level): Break the testing loop if testing fails on any
+	iteration.  Use gdb_assert.
+
 2015-02-04  Don Breazeal  <donb@codesourcery.com>
 
 	* gdb.base/info-os.c (shmid, semid, msqid): Make variables static
diff --git a/gdb/testsuite/gdb.base/gdb-sigterm.c b/gdb/testsuite/gdb.base/gdb-sigterm.c
index ed31280..0bc8c1c 100644
--- a/gdb/testsuite/gdb.base/gdb-sigterm.c
+++ b/gdb/testsuite/gdb.base/gdb-sigterm.c
@@ -20,7 +20,11 @@
 int
 main (void)
 {
-  alarm (60);
+  /* Allow for as much timeout as DejaGnu wants, plus a bit of
+     slack.  */
+  unsigned int seconds = TIMEOUT + 20;
+
+  alarm (seconds);
 
   for (;;); /* loop-line */
 }
diff --git a/gdb/testsuite/gdb.base/gdb-sigterm.exp b/gdb/testsuite/gdb.base/gdb-sigterm.exp
index 12eaf24..95d7ec3 100644
--- a/gdb/testsuite/gdb.base/gdb-sigterm.exp
+++ b/gdb/testsuite/gdb.base/gdb-sigterm.exp
@@ -17,10 +17,16 @@
 
 standard_testfile
 
-if { [build_executable ${testfile}.exp ${testfile}] == -1 } {
+# The test program exits after a while, in case GDB crashes.  Make it
+# wait at least as long as we may wait before declaring a time out
+# failure.
+set options { "additional_flags=-DTIMEOUT=$timeout" debug }
+if { [build_executable ${testfile}.exp ${testfile} $srcfile $options] == -1 } {
     return -1
 }
 
+# Return 0 on success, non-zero otherwise.
+
 proc do_test { pass } {
     global testfile gdb_prompt binfile pf_prefix
 
@@ -42,12 +48,13 @@ proc do_test { pass } {
     gdb_test_multiple "step" $test {
 	-re "infrun: stepping inside range" {
 	    # Suppress pass $test
-	    verbose -log "$pf_prefix $test"
+	    verbose -log "$pf_prefix $test: ran"
 	    set abort 0
 	}
     }
     if $abort {
-	return
+	verbose -log "$pf_prefix $test: did not run"
+	return $abort
     }
 
     set gdb_pid [exp_pid -i [board_info host fileid]]
@@ -56,30 +63,32 @@ proc do_test { pass } {
     set test "expect eof #$pass"
     set abort 1
     set stepping 0
+    # If GDB mishandles the SIGTERM and doesn't exit, this should FAIL
+    # with timeout.  We don't expect a GDB prompt, so we see one,
+    # we'll FAIL too.
     gdb_test_multiple "" $test {
 	eof {
-	    verbose -log "$pf_prefix $test"
+	    verbose -log "$pf_prefix $test: got eof"
 	    set abort 0
 	}
 	-re "infrun: stepping inside range" {
 	    incr stepping
-	    if { $stepping > 200 } {
-		fail "$test (stepping inside range $stepping times)"
-	    } else {
-		exp_continue
-	    }
+	    exp_continue
 	}
     }
-    if $abort {
-	return
-    }
+    verbose -log "$pf_prefix $test: stepped $stepping times"
+    return $abort
 }
 
 # Testcase was FAILing approx. on 10th pass with unpatched GDB.
 # 50 runs should be approx. a safe number to be sure it is fixed now.
+set passes 50
 
-for {set pass 0} {$pass < 50} {incr pass} {
+for {set pass 0} {$pass < $passes} {incr pass} {
     clean_restart ${testfile}
-    do_test $pass
+    if { [do_test $pass] != 0 } {
+	break
+    }
 }
-pass "$pass SIGTERM passes"
+
+gdb_assert {$pass == $passes} "$passes SIGTERM passes"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]