This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: Some newbie questions
On 08/28/2016 08:29 AM, Avi Kivity wrote:
>
>
> On 08/26/2016 11:07 PM, David Smith wrote:
>> On 08/26/2016 01:39 PM, Avi Kivity wrote:
>>> On 08/25/2016 08:37 PM, David Smith wrote:
>>>> On 08/25/2016 11:21 AM, Avi Kivity wrote:
>>>>> Hi,
>>>>>
>>>>> Should I wait for ongoing improvement here, or shall I look elsewhere
>>>>> for my tracing needs?
>>>>>
>>>>> It would be a pity (for me) if I have to find another solution,
>>>>> because
>>>>> systemtap has all the expressiveness and integration I need. But
>>>>> it has
>>>>> a dramatic impact on my application runtime.
>>>>>
>>>>> I was able to extract some useful data with perf probe/perf record,
>>>>> but
>>>>> as soon as I need to qualify a probe point with runtime information,
>>>>> perf falls short.
>>>> As Frank mentioned in a previous email, it might be possible for us to
>>>> switch to using straight kprobes instead of syscall tracing to handle
>>>> mmap tracing. In your use case of calling epoll_wait() lots of times
>>>> per
>>>> second, that might be a *big* win.
>>>>
>>>> I'll see what can be done to add that feature.
>>>>
>>> Thanks a lot. I'll be happy to test patches.
>> OK, since you asked...
>>
>> Here's a patch I'm testing that tries to do prefiltering when a syscall
>> occurs, so we don't have to take that lock.
>>
>> Please rebuild with it, and let me know if it (a) works, and (b) has
>> lower overhead in your situation.
>>
>
> With an unloaded system, systemtap almost vanishes from the profile.
> This is on a 2s24c48t system, running epoll_pwait() and polling on user
> memory locations in a tight loop.
... stuff deleted ...
After looking into it, Josh was right in thinking I had the test
backwards. Can you try the following patch (after reverting the last
patch) and let us know what you find?
--
David Smith
dsmith@redhat.com
Red Hat
http://www.redhat.com
256.217.0141 (direct)
256.837.0057 (fax)
diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c
index bb2d663..d2d5b70 100644
--- a/runtime/stp_utrace.c
+++ b/runtime/stp_utrace.c
@@ -26,6 +26,7 @@
#include <trace/events/syscalls.h>
#include "stp_task_work.c"
#include "linux/stp_tracepoint.h"
+#include "syscall.h"
#include "stp_helper_lock.h"
@@ -116,6 +117,8 @@ static void utrace_report_exec(void *cb_data __attribute__ ((unused)),
#define __UTRACE_REGISTERED 1
static atomic_t utrace_state = ATOMIC_INIT(__UTRACE_UNREGISTERED);
+static int __stp_utrace_syscall_tracing = 0;
+
// If wake_up_state() is exported, use it.
#if defined(STAPCONF_WAKE_UP_STATE_EXPORTED)
#define stp_wake_up_state wake_up_state
@@ -2064,6 +2067,23 @@ static void utrace_report_syscall_entry(void *cb_data __attribute__ ((unused)),
if (atomic_read(&utrace_state) != __UTRACE_REGISTERED)
return;
+
+ /* If we're only doing syscall tracing for VMA tracking, then
+ * let's prefilter the syscall numbers that we need before
+ * calling the handlers.
+ *
+ * This allows us to avoid finding the utrace struct, which
+ * avoids some locking. */
+ if (!__stp_utrace_syscall_tracing) {
+ long syscall_no = _stp_syscall_get_nr(task, regs);
+ if (syscall_no != MMAP_SYSCALL_NO(task)
+ && syscall_no != MMAP2_SYSCALL_NO(task)
+ && syscall_no != MPROTECT_SYSCALL_NO(task)
+ && syscall_no != MUNMAP_SYSCALL_NO(task))
+ return;
+ }
+
+ /* Grab the utrace struct for this task. */
utrace = task_utrace_struct(task);
/* FIXME: Is this 100% correct? */
@@ -2103,6 +2123,23 @@ static void utrace_report_syscall_exit(void *cb_data __attribute__ ((unused)),
if (atomic_read(&utrace_state) != __UTRACE_REGISTERED)
return;
+
+ /* If we're only doing syscall tracing for VMA tracking, then
+ * let's prefilter the syscall numbers that we need before
+ * calling the handlers.
+ *
+ * This allows us to avoid finding the utrace struct, which
+ * avoids some locking. */
+ if (!__stp_utrace_syscall_tracing) {
+ long syscall_no = _stp_syscall_get_nr(task, regs);
+ if (syscall_no != MMAP_SYSCALL_NO(task)
+ && syscall_no != MMAP2_SYSCALL_NO(task)
+ && syscall_no != MPROTECT_SYSCALL_NO(task)
+ && syscall_no != MUNMAP_SYSCALL_NO(task))
+ return;
+ }
+
+ /* Grab the utrace struct for this task. */
utrace = task_utrace_struct(task);
/* FIXME: Is this 100% correct? */
@@ -2469,4 +2506,9 @@ static void utrace_report_work(struct task_work *work)
stp_task_work_func_done();
}
+/* If this is called, we're doing utrace-based syscall tracing. */
+static void stap_utrace_syscall_tracing(void)
+{
+ __stp_utrace_syscall_tracing = 1;
+}
#endif /* _STP_UTRACE_C */
diff --git a/runtime/stp_utrace.h b/runtime/stp_utrace.h
index 9f162bb..34968b1 100644
--- a/runtime/stp_utrace.h
+++ b/runtime/stp_utrace.h
@@ -329,4 +329,7 @@ static inline enum utrace_resume_action utrace_resume_action(u32 action)
return action & UTRACE_RESUME_MASK;
}
+/* If this is called, we're doing utrace-based syscall tracing. */
+static void stap_utrace_syscall_tracing(void);
+
#endif /* _STP_UTRACE_H */
diff --git a/tapset-utrace.cxx b/tapset-utrace.cxx
index 8580800..154c335 100644
--- a/tapset-utrace.cxx
+++ b/tapset-utrace.cxx
@@ -1208,6 +1208,12 @@ utrace_derived_probe_group::emit_module_linux_init (systemtap_session& s)
return;
s.op->newline() << "/* ---- utrace probes ---- */";
+ if (flags_seen[UDPF_SYSCALL] || flags_seen[UDPF_SYSCALL_RETURN])
+ {
+ s.op->newline() << "#if !defined(CONFIG_UTRACE)";
+ s.op->newline() << "stap_utrace_syscall_tracing();";
+ s.op->newline() << "#endif";
+ }
s.op->newline() << "for (i=0; i<ARRAY_SIZE(stap_utrace_probes); i++) {";
s.op->newline(1) << "struct stap_utrace_probe *p = &stap_utrace_probes[i];";
s.op->newline() << "probe_point = p->probe->pp;"; // for error messages