This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Script to look at postgresql lock contention
- From: William Cohen <wcohen at redhat dot com>
- To: SystemTAP <systemtap at sources dot redhat dot com>
- Date: Tue, 03 Mar 2009 21:46:02 -0500
- Subject: Script to look at postgresql lock contention
Hi All,
I have been playing around with a locally built version of postsql with the
systemtap markers available. Today I was working on a demo to show how
sysetemtap could be used to look at postgres lock contention. The tracks the
time that a process is waiting for a lock and the number of times that it fails
to grab a lock. The script prints out the top 5 lock/pid contentions for regular
locks and lightweight locks. It also prints out the top 5 loc/pid combinations
that failed to obtain a lock when trying. The output looks like:
locks
lock pid contend(us) count avg
0x4ec 26978 882 1 882
0x4e1 27006 14976 1 14976
light weight locks
lock pid contend(us) count avg
0x8 27011 198222 130 1524
0x8 27029 171348 130 1318
0x8 26971 244168 98 2491
0x8 26969 194154 87 2231
0x8 27017 126004 65 1938
fail lock tries
lock pid count
0x7 26811 12
0x7 27019 11
0x7 26819 7
0x7 26809 7
0x7 27017 7
Any feedback on this example code would be appreciated.
-Will
global lock, lwlock, tlock, tlwlock, flock
probe process("/usr/bin/postgres").mark("lock__startwait")
{
lock[$arg1] = gettimeofday_us()
}
probe process("/usr/bin/postgres").mark("lock__endwait")
{
t = gettimeofday_us()
if ( $arg1 in lock) {
tlock[$arg1, pid()] <<< t - lock[$arg1]
delete lock[$arg1]
}
}
probe process("/usr/bin/postgres").mark("lwlock__startwait")
{
lwlock[$arg1] = gettimeofday_us()
}
probe process("/usr/bin/postgres").mark("lwlock__endwait")
{
t = gettimeofday_us()
if ( $arg1 in lwlock) {
tlwlock[$arg1,pid()] <<< t - lwlock[$arg1]
delete lwlock[$arg1]
}
}
probe process("/usr/bin/postgres").mark("lwlock__condacquire__fail")
{
flock[$arg1,pid()] <<< 1
}
function print_data ()
{
printf("\nlocks\n")
printf("%8s %5s %10s %10s %10s\n",
"lock", "pid", "contend(us)", "count", "avg")
foreach ([q,p] in tlock- limit 5)
printf("%#8x %5d %10d %10d %10d\n", q, p, @sum(tlock[q,p]),
@count(tlock[q,p]), @avg(tlock[q,p]))
printf("\n\nlight weight locks\n")
printf("%8s %5s %10s %10s %10s\n",
"lock", "pid", "contend(us)", "count", "avg")
foreach ([q,p] in tlwlock- limit 5)
printf("%#8x %5d %10d %10d %10d\n", q, p, @sum(tlwlock[q,p]),
@count(tlwlock[q,p]), @avg(tlwlock[q,p]))
printf("\n\nfail lock tries\n")
printf("%8s %5s %10s\n", "lock", "pid", "count")
foreach ([q,p] in flock- limit 5)
printf("%#8x %5d %10d\n", q, p, @sum(flock[q,p]))
}
probe end { print_data() }