#! /usr/bin/env stap global cycles_per_insn global branch_per_insn global cacheref_per_insn global insns global cycles global branches global cacherefs global insn probe perf.hw.instructions.process("/usr/bin/find").counter("find_insns") {} probe perf.hw.cpu_cycles.process("/usr/bin/find").counter("find_cycles") {} probe perf.hw.branch_instructions.process("/usr/bin/find").counter("find_branches") {} probe perf.hw.cache_references.process("/usr/bin/find").counter("find_cache_refs") {} probe process("/usr/bin/find").function("visit") # generally .inline'd, thus the need for debuginfo { insn["find_insns"] = @perf("find_insns") insns <<< (insn["find_insns"]) insn["find_cycles"] = @perf("find_cycles") cycles <<< insn["find_cycles"] insn["find_branches"] = @perf("find_branches") branches <<< insn["find_branches"] insn["find_cache_refs"] = @perf("find_cache_refs") cacherefs <<< insn["find_cache_refs"] } probe process("/usr/bin/find").function("visit").return !, process("/usr/bin/find").statement("visit@ftsfind.c+13").nearest # in lieu of .inline.return { dividend = (@perf("find_cycles") - insn["find_cycles"]) divisor = (@perf("find_insns") - insn["find_insns"]) q = (divisor > 0 ? dividend / divisor : 0) if (q > 0) cycles_per_insn <<< q dividend = (@perf("find_branches") - insn["find_branches"]) q = (divisor > 0 ? dividend / divisor : 0) if (q > 0) branch_per_insn <<< q dividend = (@perf("find_cycles") - insn["find_cycles"]) q = (divisor > 0 ? dividend / divisor : 0) if (q > 0) cacheref_per_insn <<< q } probe end { if (@count(cycles_per_insn)) { printf ("Cycles per Insn\n\n") print (@hist_log(cycles_per_insn)) } if (@count(branch_per_insn)) { printf ("\nBranches per Insn\n\n") print (@hist_log(branch_per_insn)) } if (@count(cacheref_per_insn)) { printf ("Cache Refs per Insn\n\n") print (@hist_log(cacheref_per_insn)) } }