How often do processes wake up due to timeouts?

Problem

(Transcribed from http://dev.laptop.org/ticket/110#comment:4)

Based on http://blogs.gnome.org/view/ryanl/2006/09/17/0, this SystemTap script aims to reveal how often processes wake up as a result of setting timers for themselves. The intention is to make it extremely easy to spot poorly behaved applications.

Scripts

pstimeouts

# pstimeouts
# Copyright (C) 2006 Red Hat, Inc., Eugene Teo <eteo@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# Based on http://blogs.gnome.org/view/ryanl/2006/09/17/0
#
# Tested on:
# [eteo@kerndev ~]$ date
# Mon Oct 30 21:12:40 SGT 2006
# [eteo@kerndev ~]$ uname -a
# Linux kerndev.sin.redhat.com 2.6.18-1.2798.fc6 #1 SMP Mon Oct 16 14:37:32 EDT 2006 i686 i686 i386 GNU/Linux
#

uid=`id -u`; alluid=0; detail=0

while getopts ":auh" options; do
        case $options in
        a) alluid=1;;
        u) detail=1;;
        h|?) cat >&2 <<-EOF
pstimeouts [-au] [pids..]
  -a   show processes belonging to all users
  -u   detailed listing
EOF
           exit 1;;
        esac
done

/usr/bin/env stap -g -e '
global alluid, detail, uid
global process, trace_poll, trace_select, trace_epoll, total_timeouts
global poll_timeouts, epoll_timeouts, select_timeouts, itimer_timeouts, schedule_timeouts

/* FIXME: remove this if it is already committed into CVS */
function kernel_long:long(addr:long) %{ /* pure */
        THIS->__retvalue = deref (sizeof(long), (uintptr_t)(THIS->addr));
        goto success;
deref_fault: /* branched to from deref() */
        {
                static char errmsg[40];
                snprintf (errmsg, 40, "kernel long copy fault at 0x%p",
                        (void *) (uintptr_t) THIS->addr);
                CONTEXT->last_error = errmsg;
        }
success: ;
%}

probe begin {
        alluid = '$alluid'
        detail = '$detail'
        uid = '$uid'
}

probe kernel.statement("*@kernel/fork.c:175") {
        p = pid()
        poll_timeouts[p] = epoll_timeouts[p] = select_timeouts[p] = itimer_timeouts[p] = schedule_timeouts[p] = 0
}

probe kernel.function("do_sys_poll") {
        trace_poll[pid()] = 1
}

probe kernel.function("do_sys_poll").return {
        trace_poll[pid()] = 0
}

/* do_sys_poll@fs/select.c */
/* Could not access do_poll directly because of a gcc bug:
   http://sources.redhat.com/bugzilla/show_bug.cgi?id=1155 */
probe kernel.statement("*@fs/select.c:717") {
        /* assume !*timeout if count == 0,
           see fs/select.c:622 */
        if ($fdcount == 0)
                poll_timeouts[pid()]++
}

probe kernel.function("do_select") {
        trace_select[pid()] = 1
}

probe kernel.function("do_select").return {
        trace_select[pid()] = 0
}

/* do_select@fs/select.c */
probe kernel.statement("*@fs/select.c:289") {
        if (!kernel_long($timeout))
                select_timeouts[pid()]++
}

/* ep_poll@fs/eventpoll.c */
probe kernel.function("*@fs/eventpoll.c:1495") {
        trace_epoll[pid()] = 1
}

/* it_real_fn@kernel/itimer.c */
probe kernel.function("it_real_fn") {
        itimer_timeouts[pid()]++
}

/* schedule_timeout@kernel/timer.c */
probe kernel.function("schedule_timeout").return {
        p = pid()
        process[p, uid()] = execname()
        if ($return <= 0)
                schedule_timeouts[p]++
        if (trace_poll[p] && !$return)
                schedule_timeouts[p]--
        else if (trace_select[p] && !$return)
                schedule_timeouts[p]--
        else if (trace_epoll[p] && !$return) {
                epoll_timeouts[p]++
                schedule_timeouts[p]--
                trace_epoll[p] = 0
        }
}

probe kernel.function("do_exit") {
        p = pid() u = uid()
        if (process[p, u] == execname())
                delete process[p, u]
}

probe timer.ms(1000) {
        printf("\033[2J\033[1;1H") /* clear screen */
        if (detail) {
                printf ("  uid     pid |    poll   select    epoll   itimer    other| process\n");
                foreach ([p+, u] in process) {
                        if (!alluid && u != uid) continue
                        printf ("%5d %7d |%8d %8d %8d %8d %8d| %-.38s\n", u, p,
                                poll_timeouts[p], select_timeouts[p], 
                                epoll_timeouts[p], itimer_timeouts[p],
                                schedule_timeouts[p], process[p, u])
                }
        } else {
                printf ("  pid timeouts process\n");
                foreach ([p+, u] in process) {
                        if (!alluid && u != uid) continue
                        total_timeouts = poll_timeouts[p] + select_timeouts[p] +
                                epoll_timeouts[p] + itimer_timeouts[p] + schedule_timeouts[p]
                        printf ("%5d %8d %-.38s\n", p, total_timeouts, process[p, u])
                }
        }
}
'

Output

[eteo@kerndev ~]$ ./pstimeouts -h
pstimeouts [-au] [pids..]
  -a   show processes belonging to all users
  -u   detailed listing
[eteo@kerndev ~]$ ./pstimeouts
  pid timeouts process
 3485       20 gnome-power-man
 3530       16 scim-panel-gtk
 3532       42 scim-launcher
...
[eteo@kerndev ~]$ ./pstimeouts -a
  pid timeouts process
 2148        0 syslogd
 2311        4 pcscd
 2342        1 automount
...
[eteo@kerndev ~]$ ./pstimeouts -u
  uid     pid |    poll   select    epoll   itimer    other| process
  500    3463 |     116        0        0        0        0| gnome-settings-
  500    3467 |       0        0        0        0        0| scim-launcher
  500    3482 |     220        0        0        2        0| metacity
...
[eteo@kerndev ~]$ ./pstimeouts -ua
  uid     pid |    poll   select    epoll   itimer    other| process
    0    2311 |       0        6        0        0        0| pcscd
    0    2342 |       0        0        0        0        2| automount
   38    2404 |       0        0        0        0        0| ntpd
...

Lessons

SystemTap makes it so much easier to gather statistics from the kernel and display it at the same time.


WarStories

None: WSPSTimeouts (last edited 2008-01-10 19:47:34 by localhost)