This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Script to measure resource usage based on process arguments


I wrote the attached script in response to a customer question. They wanted to know if SystemTap could show them which set of java processes were consuming the most resources, based on the process arguments. They may have several thousand java instances running at one time and those instances can have very long argument strings. I'm not sure the script gives them exactly what they want, but I thought I'd send this out for comments anyway.

The script lets you to specify process names and strings to look for in their arguments. It then aggregates the resources used by all processes with those names and argument strings. In this case, it tracks cpu time, but you could modify it to track any resource value that's available in task_struct. It prints the results at the end of an interval. For example, I ran the script as shown below during a kernel build.

# ./argtap -i 30 -n gcc -n as -a fs/ -a mm/ -a arch/ -a security/ -a crypto/ -a drivers/ ======================= Mon Jan 8 15:59:05 2007 ========================
TIME(ms) #PROC PROCNAME ARG SEGMENT
1472 33 as drivers/
975 25 as fs/
91 35 gcc drivers/
69 27 gcc fs/


======================= Mon Jan  8 15:59:35 2007 ========================
TIME(ms) #PROC  PROCNAME         ARG SEGMENT
2174     55     as               fs/
683      17     as               drivers/
155      56     gcc              fs/
44       17     gcc              drivers/

The results show how many gcc and as processes with the specified strings in their arguments ran during a 30 second interval and how much total cpu time they used.

While writing this script I had to figure out how get the arguments from existing processes. That took awhile. Since the arguments are stored in the processes' address space, we can't just access them whenever we want to. The usual routines used in /proc to grab another processes' arguments won't work in systemtap because irqs are always disabled in probes, even begin probes. We can't call functions that might sleep in systemtap. I eventually realized I could access the arguments when the process is "current" in the scheduler.cpu_off probe. This is okay because the script doesn't care about processes that don't get scheduled.

Note that there's a lot of embedded C in this script, which I don't really like, but I couldn't think of any other way to do what needed to be done. Also note I had to use a big MAXACTION value to get it to work in some cases. Given that limitation, I'm beginning to think systemtap scripts should be used more for extracting raw data and data processing should be left to user scripts and programs, especially where large amounts of data are involved.

Anyway, please look this over and give me your comments. I'm especially interested in ways I could improve it.

Mike Mason

#!/bin/bash

# Filter options
F_ARGSTR=""; F_ARGS=0  # Filter and group by process arguments
F_PIDSTR=""; F_PID=0   # Filter by process ID
F_NAMSTR=""; F_NAME=0  # Filter by process name
F_UIDSTR=""; F_UID=0   # Filter by user
FILTER=0               # Any filters specified?
ERROR=0

# Print options
P_INTERVAL=5 # default interval between output

DELIM=","

function usage {
	echo "USAGE: argtap [-a argstr]... [-i interval] [-n pname]..."
	echo "              [-p pid]... [-u username]... [-h]"
	echo "    -a argstr    # argument string to look for (default: all)"
	echo "    -i interval  # interval in seconds between printing (default: $P_INTERVAL)"
	echo "    -n pname     # this process name only (default: all)"
	echo "    -p pid       # this process ID only (default: all)"
	echo "    -u username  # this user only (default: all)"
	echo "    -h           # display this help test"
	echo ""
}

# Process options
while getopts a:i:n:p:u:h option; do
	case $option in
	a)      let "F_ARGS++"
	        F_ARGSTR=$OPTARG$DELIM$F_ARGSTR ;;
	i)      P_INTERVAL=$OPTARG ;;
	n)      let "F_NAME++"
	        F_NAMESTR=$OPTARG$DELIM$F_NAMESTR ;;
	p)      let "F_PID++"
	        F_PIDSTR=$OPTARG$DELIM$F_PIDSTR ;;
	u)      uid=`awk -F: '$1 == name {print $3}' name=$OPTARG /etc/passwd`
	        if [[ $uid != "" ]]; then
	            let "F_UID++"
	            F_UIDSTR=$uid$DELIM$F_UIDSTR
	        else
	            echo "ERROR: Unknown user:" $OPTARG
	            let "ERROR++"
	        fi ;;
	h|?|*)  usage
	        exit 1 ;;
	esac
done

if [[ $ERROR > 0 ]]; then
	exit 1
fi

if [[ $F_ARGS > 0 || $F_NAME > 0 || $F_PID > 0 || $F_UID > 0 ]]; then
	FILTER=1
fi

#
# Pass a timezone adjustment value to the stap script
#
TZ=`date "+%z"`
TZ_SIGN=`echo $TZ | cut -c1`
TZ_HOURS=`echo $TZ | cut -c2-3`
TZ_MINS=`echo $TZ | cut -c4-5`
TZ_ADJUST=$TZ_SIGN$((10#$TZ_HOURS*60*60+10#$TZ_MINS*60))

# Start the systemtap script
#
stap -g -DMAXACTION=50000 -DMAXSTRINGLEN=1024 -DMAXMAPENTRIES=2048 -e '
%{
#include <linux/sched.h>
#include <linux/rcupdate.h>

struct task_struct *__pid2task(int pid)
{
	struct task_struct *task = NULL;

	rcu_read_lock();
	task = find_task_by_pid (pid);
	rcu_read_unlock();
	return(task);
}
%}


function rcu_read_lock()
%{
	rcu_read_lock();
%}


function rcu_read_unlock()
%{
	rcu_read_unlock();
%}


function next_task:long(cur_task:long)
%{
        THIS->__retvalue = (long) next_task((struct task_struct *)THIS->cur_task);
%}


function inittask:long()
%{
        THIS->__retvalue = (long) &init_task;
%}


function ltostr:string (l:long)
{
	return sprintf("%d", l)
}


function tokenize:string(input:string, delim:string)
%{
	static char str[MAXSTRINGLEN];
	static char *str_start;
	char *token = NULL;

	if (THIS->input[0]) {
		strlcpy(str, THIS->input, MAXSTRINGLEN);
		str_start = &str[0];
	}
	token = strsep(&str_start, THIS->delim);
	if (token)
		strlcpy (THIS->__retvalue, token, MAXSTRINGLEN);
%}

function strtol:long(str:string)
%{
	THIS->__retvalue = simple_strtol(THIS->str, NULL, 10);
%}


function current_args:string(delimit:string)
{
	return __task_args(task_current(), delimit)
}


function __task_args:string(task:long, delimit:string)
%{
	struct task_struct *task;
	struct mm_struct *mm;
	char buffer[MAXSTRINGLEN];
	int len, i, left = 0;

	task = (struct task_struct *) deref (sizeof(struct task_struct *),
	                                     &(THIS->task));

	// This should only be called for the "current" task. Restriction may
	// be lifted later.
	if (!task || (task != current))
		goto end;

	mm = (struct mm_struct *) deref (sizeof(struct mm_struct *),
	                                 &(task->mm));
	if (!mm)
		goto end;

	len = mm->arg_end - mm->arg_start;
	if (len > MAXSTRINGLEN)
		len = MAXSTRINGLEN;
	left = _stp_copy_from_user (buffer, (char *)mm->arg_start, len);
	len = len - left;  /* left = number of bytes that could not be copied,
	                      silently ignore for now */

	/* Replace NULLs with the caller specified separator */
	for (i = 0; i < len; i++)
		if (buffer[i] == '\0')
			buffer[i] = THIS->delimit[0];
	/* Make sure the string is NULL terminated */
	buffer[len-1] = '\0';

	strlcpy(THIS->__retvalue, buffer, len);

end:
	if (0) {
deref_fault:
		CONTEXT->last_error = "pointer dereference fault";
	}
%}


// args_str()
// Concatenates the argv arguments, separated by a delimter,
// into one string, up to a maximum length of MAXSTRINGLEN.
function args_str:string(args    :long,     # ptr to argv array
                         delimit :string,   # Delimiter to use between
                                            # arguments in returned string,
                                            # useful for later tokenizing
                                            # of the arguments string
                         execname:long)     # Indicates whether to return the
                                            # execname (1st arg) in the
                                            # returned string
%{
	char **argv, *arg;
	int first = 1;

	argv = (char **) deref (sizeof(char **), &(THIS->args));
	if (argv == NULL) goto end;
	arg = (char *) deref (sizeof (char *), &(*argv));

	while (arg) {
		if (!first || (first && THIS->execname)) {
			strlcat (THIS->__retvalue, arg, MAXSTRINGLEN);
			strlcat (THIS->__retvalue, THIS->delimit, MAXSTRINGLEN);
			first = 0;
		}
		arg = (char *) deref (sizeof (char *), &(*++argv));
	}

	if (0) {
deref_fault:
		CONTEXT->last_error = "pointer dereference fault";
	}
end: ;
%}


// args_exec()
// Returns the 1st argument (exec name or path) from argv
function args_exec:string (args:long) # ptr to argv array
%{
	char **argv, *arg;

	argv = (char **) deref (sizeof(char **), &(THIS->args));
	if (argv == NULL) goto end;
	arg = (char *) deref (sizeof (char *), &(*argv));
	if (arg) strlcpy (THIS->__retvalue, arg, MAXSTRINGLEN);

	if (0) {
deref_fault:
		CONTEXT->last_error = "pointer dereference fault";
	}
end: ;
%}


function cmd_from_path:string (path:string)
%{
	char *p  = THIS->path + strlen(THIS->path);
	char sep[2] = "/";

	while ( (*p != *sep) && (p != THIS->path) ) p--;
	if (*p == *sep) p++;
	strlcpy(THIS->__retvalue, p, MAXSTRINGLEN);
%}


function pid_task:long (pid:long)
%{
	THIS->__retvalue = (long) __pid2task((int) THIS->pid);
%}


//
// Returns total cpu time in milliseconds based on pid
//
function pid_cputime:long(pid:long)
%{
	struct task_struct *task = __pid2task((int) THIS->pid);
	THIS->__retvalue = -1;
	task = (struct task_struct *)
	     deref (sizeof(struct task_struct *), &(task));

	if (task) {
		task_lock(task);
		THIS->__retvalue = (long) cputime_to_msecs(task->stime + task->utime);
		task_unlock(task);
	}

	if (0) {
deref_fault:
		CONTEXT->last_error = "pointer dereference fault";
	}
end: ;
%}

//
// Returns total cpu time in milliseconds based on task descriptor
//
function task_cputime:long(task:long)
%{
	struct task_struct *task = (struct task_struct *)
	      deref (sizeof(struct task_struct *), &(THIS->task));

	THIS->__retvalue = -1;

	if (task) {
		task_lock(task);
		THIS->__retvalue = (long) cputime_to_msecs(task->stime + task->utime);
		task_unlock(task);
	}

	if (0) {
deref_fault:
		CONTEXT->last_error = "pointer dereference fault";
	}
end: ;
%}

//
// Determines whether this is a process descriptor for a
// kernel thread (has no user address space).
//
function is_kthread:long (task:long)
%{
	struct task_struct *task;
	THIS->__retvalue = 0;

	task = (struct task_struct *) 
	            deref (sizeof(struct task_struct *), &(THIS->task));
	task_lock(task);
	if (task->mm == NULL)
		THIS->__retvalue = 1;
	task_unlock(task);

	if (0) {
deref_fault:
		CONTEXT->last_error = "pointer dereference fault";
	}
end: ;
%}


global uniq_args_cputime, uniq_args_cnt
global old, new
global initial_pids, current_pids, done_pids
global execpath, execname, cputime, user, args
global f_args, f_names, f_pids, f_uids
global arg_cputime, arg_cputime_cnt

probe begin(-1000)
{
	old = 0
	new = 1

	f_arg_str  = "'$F_ARGSTR'"
	f_name_str = "'$F_NAMESTR'"
	f_pid_str  = "'$F_PIDSTR'"
	f_uid_str  = "'$F_UIDSTR'"

	delim = "'$DELIM'"

	# Argument strings
	if ('$F_ARGS') {
		tok = tokenize(f_arg_str, delim)
		while (tok != "") {
			f_args[tok] = 1
			tok = tokenize("", delim)
		}
	}

	# Process names
	if ('$F_NAME') {
		tok = tokenize(f_name_str, delim)
		while (tok != "") {
			f_names[tok] = 1
			tok = tokenize("", delim)
		}
	}

	# Process IDs
	if ('$F_PID') {
		tok = tokenize(f_pid_str, delim)
		while (tok != "") {
			f_pids[strtol(tok)] = 1
			tok = tokenize("", delim)
		}
	}

	# User IDs
	if ('$F_UID') {
		tok = tokenize(f_uid_str, delim)
		while (tok != "") {
			f_uids[strtol(tok)] = 1
			tok = tokenize("", delim)
		}
	}

	// Grab the existing pids
	itask = inittask()
	rcu_read_lock()
	for (task = next_task(itask); task != itask; task = next_task(task)) {
		// Ignore kernel threads
		if (is_kthread(task)) continue
		p = task_pid(task)
		if (p > 0) {
			// NOTE: process arguments are kept in user address space.
			// All probes, including begin, run with irqs disabled and 
			// cannot sleep, so we cannot access user space for non-current
			// processes at this point. Thus, we wait until a process is
			// "current" in scheduler.cpu_off before grabbing the arguments.
			initial_pids[p] = 1
			user[p] = task_uid(task)
			cputime[p, old] = task_cputime(task)
		}
	}
	rcu_read_unlock()
}


probe scheduler.cpu_off
{
	// If this process existed when the script started, we have
	// get the process arguments when the process is "current".
	p = pid()
	if ([p] in initial_pids) {
		args[p] = current_args(" ")
		execpath[p] = tokenize(args[p], " ")
		execname[p] = cmd_from_path(execpath[p])
		delete initial_pids[p]
		match_filters(p) ? current_pids[p] = 1 : delete_pid(p)
	}
}


probe process.exec
{
	p = pid()
	args[p] = args_str($argv, " ", 1)
	execpath[p] = args_exec($argv)
	execname[p] = cmd_from_path(execpath[p])
	user[p] = uid()
	match_filters(p) ? current_pids[p] = 1 : delete_pid(p)
}


probe process.exit
{
	p = pid()

	if (!([p] in current_pids)) next

	// Save cpu time for exiting process
	cputime[p, new] = pid_cputime(p)

	// Mark for deletion
	done_pids[p] = 1
}

function delete_pid (p:long)
{
	delete current_pids[p]
	delete execname[p]
	delete execpath[p]
	delete user[p]
	delete args[p]
	delete cputime[p, new]
	delete cputime[p, old]
}

function match_filters:long (p:long)
{
	# Check filters
	if ('$FILTER') {
		if ('$F_NAME' && !([execname[p]] in f_names)) return 0
		if ('$F_PID'  && !([p]           in f_pids))  return 0
		if ('$F_UID'  && !([user[p]]     in f_uids))  return 0
		if ('$F_ARGS') {
			foreach ([str] in f_args)
				if (isinstr(args[p], str)) match++
			if (!match) return 0
		}
	}

	return 1
}

probe timer.s('$P_INTERVAL')
{
	foreach ([p] in current_pids) {
		if (!([p] in done_pids))
			cputime[p, new] = pid_cputime(p)

		if ('$F_ARGS') {
			// Argument segments were specified. Group by process name
			// and argument segment.
			foreach ([arg] in f_args)
				if (isinstr(args[p], arg)) {
					arg_cputime[execname[p], arg] += cputime[p, new] - cputime[p, old]
					arg_cputime_cnt[execname[p], arg]++
				}
		} else {
			// Argument segments were NOT specified. Group by process
			// name and entire argument sring. Note the argument string
			// can be quite long is some cases.
			arg_cputime[execname[p], args[p]] += cputime[p, new] - cputime[p, old]
			arg_cputime_cnt[execname[p], args[p]]++
		}
	}

	time = gettimeofday_s() + '$TZ_ADJUST'
	printf("======================= %s ========================\n", ctime(time))
	printf("%-8s %-6s %-16s %-s\n", "TIME(ms)", "#PROC", "PROCNAME", "ARG SEGMENT")
	foreach ([name, arg] in arg_cputime-) 
		printf ("%-8d %-6d %-16s %-s\n", arg_cputime[name, arg],
		        arg_cputime_cnt[name, arg], name, arg)

	printf("\n")

	delete arg_cputime
	delete arg_cputime_cnt

	// Clean up processes that have exited
	foreach ([p] in done_pids) delete_pid(p)
	delete done_pids

	// flip indexes
	tmp = new
	new = old
	old = tmp
}
'

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]