This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[blktrace kernel patch] Separate out non-blktrace-specific code


This patch moves the non-block-specific code out of the core blktrace
kernel code and moves it into a separate utt.* files, so that anyone
can use it for generic tracing.  It also adds a config option,
CONFIG_UTT, and hooks up the remaining code in blktrace.c to use it.
The accompanying userspace patch does the same thing for the userspace
tools.

The only real problem I had was that sched_clock() isn't exported,
which you'll have to do in order to use it with the systemtap
integration patch (this patch already adds that hack for x86, though).

Tom


diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index b8fa0a8..09b37c1 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -124,6 +124,8 @@ unsigned long long sched_clock(void)
 	/* return the value in ns */
 	return cycles_2_ns(this_offset);
 }
+// utt hack for now
+EXPORT_SYMBOL_GPL(sched_clock);
 
 static unsigned long calculate_cpu_khz(void)
 {
diff --git a/block/Kconfig b/block/Kconfig
index b6f5f0a..4d7f041 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -14,8 +14,7 @@ config LBD
 config BLK_DEV_IO_TRACE
 	bool "Support for tracing block io actions"
 	depends on SYSFS
-	select RELAY
-	select DEBUG_FS
+	select UTT
 	help
 	  Say Y here, if you want to be able to trace the block layer actions
 	  on a given queue. Tracing allows you to see any traffic happening
@@ -24,6 +23,19 @@ config BLK_DEV_IO_TRACE
 
 	  git://brick.kernel.dk/data/git/blktrace.git
 
+config UTT
+	bool "Unified Tracing Transport"
+	select RELAY
+	select DEBUG_FS
+	help
+	  This option enables support for the tracing transport
+	  used by tracing tools such as blktrace, LTT and systemtap.
+	  The UTT can also be used as a tracing transport for one-off
+	  tools by making use of a matching set of generic userspace
+	  tools which can be found at some repository.
+
+	  If unsure, say N.
+
 config LSF
 	bool "Support for Large Single Files"
 	depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML
diff --git a/block/Makefile b/block/Makefile
index c05de0e..78ccacd 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadli
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
 
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
+obj-$(CONFIG_UTT)		+= utt.o
diff --git a/block/blktrace.c b/block/blktrace.c
index ed8fc4c..8ae85cb 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -22,9 +22,9 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
+#include <linux/utt.h>
 #include <asm/uaccess.h>
 
-static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
 static unsigned int blktrace_seq __read_mostly = 1;
 
 /*
@@ -35,7 +35,7 @@ static void trace_note_tsk(struct blk_tr
 {
 	struct blk_io_trace *t;
 
-	t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm));
+	t = relay_reserve(bt->utt->rchan, sizeof(*t) + sizeof(tsk->comm));
 	if (t) {
 		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
 		t->device = bt->dev;
@@ -96,7 +96,7 @@ void __blk_add_trace(struct blk_trace *b
 	pid_t pid;
 	int cpu;
 
-	if (unlikely(bt->trace_state != Blktrace_running))
+	if (unlikely(bt->utt->trace_state != Utt_trace_running))
 		return;
 
 	what |= ddir_act[rw & WRITE];
@@ -121,14 +121,14 @@ void __blk_add_trace(struct blk_trace *b
 	if (unlikely(tsk->btrace_seq != blktrace_seq))
 		trace_note_tsk(bt, tsk);
 
-	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
+	t = relay_reserve(bt->utt->rchan, sizeof(*t) + pdu_len);
 	if (t) {
 		cpu = smp_processor_id();
-		sequence = per_cpu_ptr(bt->sequence, cpu);
+		sequence = per_cpu_ptr(bt->utt->sequence, cpu);
 
 		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
 		t->sequence = ++(*sequence);
-		t->time = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
+		t->time = sched_clock() - per_cpu(utt_trace_cpu_offset, cpu);
 		t->sector = sector;
 		t->bytes = bytes;
 		t->action = what;
@@ -147,59 +147,6 @@ void __blk_add_trace(struct blk_trace *b
 
 EXPORT_SYMBOL_GPL(__blk_add_trace);
 
-static struct dentry *blk_tree_root;
-static struct mutex blk_tree_mutex;
-static unsigned int root_users;
-
-static inline void blk_remove_root(void)
-{
-	if (blk_tree_root && simple_empty(blk_tree_root)) {
-		debugfs_remove(blk_tree_root);
-		blk_tree_root = NULL;
-	}
-}
-
-static void blk_remove_tree(struct dentry *dir)
-{
-	mutex_lock(&blk_tree_mutex);
-	debugfs_remove(dir);
-	if (--root_users == 0)
-		blk_remove_root();
-	mutex_unlock(&blk_tree_mutex);
-}
-
-static struct dentry *blk_create_tree(const char *blk_name)
-{
-	struct dentry *dir = NULL;
-
-	mutex_lock(&blk_tree_mutex);
-
-	if (!blk_tree_root) {
-		blk_tree_root = debugfs_create_dir("block", NULL);
-		if (!blk_tree_root)
-			goto err;
-	}
-
-	dir = debugfs_create_dir(blk_name, blk_tree_root);
-	if (dir)
-		root_users++;
-	else
-		blk_remove_root();
-
-err:
-	mutex_unlock(&blk_tree_mutex);
-	return dir;
-}
-
-static void blk_trace_cleanup(struct blk_trace *bt)
-{
-	relay_close(bt->rchan);
-	debugfs_remove(bt->dropped_file);
-	blk_remove_tree(bt->dir);
-	free_percpu(bt->sequence);
-	kfree(bt);
-}
-
 static int blk_trace_remove(request_queue_t *q)
 {
 	struct blk_trace *bt;
@@ -208,76 +155,9 @@ static int blk_trace_remove(request_queu
 	if (!bt)
 		return -EINVAL;
 
-	if (bt->trace_state == Blktrace_setup ||
-	    bt->trace_state == Blktrace_stopped)
-		blk_trace_cleanup(bt);
-
-	return 0;
-}
-
-static int blk_dropped_open(struct inode *inode, struct file *filp)
-{
-	filp->private_data = inode->i_private;
-
-	return 0;
-}
-
-static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	struct blk_trace *bt = filp->private_data;
-	char buf[16];
-
-	snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
-
-	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-static struct file_operations blk_dropped_fops = {
-	.owner =	THIS_MODULE,
-	.open =		blk_dropped_open,
-	.read =		blk_dropped_read,
-};
-
-/*
- * Keep track of how many times we encountered a full subbuffer, to aid
- * the user space app in telling how many lost events there were.
- */
-static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
-				     void *prev_subbuf, size_t prev_padding)
-{
-	struct blk_trace *bt;
-
-	if (!relay_buf_full(buf))
-		return 1;
-
-	bt = buf->chan->private_data;
-	atomic_inc(&bt->dropped);
-	return 0;
-}
-
-static int blk_remove_buf_file_callback(struct dentry *dentry)
-{
-	debugfs_remove(dentry);
-	return 0;
-}
-
-static struct dentry *blk_create_buf_file_callback(const char *filename,
-						   struct dentry *parent,
-						   int mode,
-						   struct rchan_buf *buf,
-						   int *is_global)
-{
-	return debugfs_create_file(filename, mode, parent, buf,
-					&relay_file_operations);
+	return utt_trace_remove(bt->utt);
 }
 
-static struct rchan_callbacks blk_relay_callbacks = {
-	.subbuf_start		= blk_subbuf_start_callback,
-	.create_buf_file	= blk_create_buf_file_callback,
-	.remove_buf_file	= blk_remove_buf_file_callback,
-};
-
 /*
  * Setup everything required to start tracing
  */
@@ -286,25 +166,22 @@ static int blk_trace_setup(request_queue
 {
 	struct blk_user_trace_setup buts;
 	struct blk_trace *old_bt, *bt = NULL;
-	struct dentry *dir = NULL;
 	char b[BDEVNAME_SIZE];
 	int ret, i;
 
 	if (copy_from_user(&buts, arg, sizeof(buts)))
 		return -EFAULT;
 
-	if (!buts.buf_size || !buts.buf_nr)
-		return -EINVAL;
-
-	strcpy(buts.name, bdevname(bdev, b));
+	strcpy(buts.utts.root, "block");
+	strcpy(buts.utts.name, bdevname(bdev, b));
 
 	/*
 	 * some device names have larger paths - convert the slashes
 	 * to underscores for this to work as expected
 	 */
-	for (i = 0; i < strlen(buts.name); i++)
-		if (buts.name[i] == '/')
-			buts.name[i] = '_';
+	for (i = 0; i < strlen(buts.utts.name); i++)
+		if (buts.utts.name[i] == '/')
+			buts.utts.name[i] = '_';
 
 	if (copy_to_user(arg, &buts, sizeof(buts)))
 		return -EFAULT;
@@ -314,28 +191,13 @@ static int blk_trace_setup(request_queue
 	if (!bt)
 		goto err;
 
-	bt->sequence = alloc_percpu(unsigned long);
-	if (!bt->sequence)
+	bt->utt = utt_trace_setup(&buts.utts);
+	if (!bt->utt) {
+		ret = buts.utts.err;
 		goto err;
-
-	ret = -ENOENT;
-	dir = blk_create_tree(buts.name);
-	if (!dir)
-		goto err;
-
-	bt->dir = dir;
+	}
+		
 	bt->dev = bdev->bd_dev;
-	atomic_set(&bt->dropped, 0);
-
-	ret = -EIO;
-	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
-	if (!bt->dropped_file)
-		goto err;
-
-	bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks);
-	if (!bt->rchan)
-		goto err;
-	bt->rchan->private_data = bt;
 
 	bt->act_mask = buts.act_mask;
 	if (!bt->act_mask)
@@ -347,7 +209,6 @@ static int blk_trace_setup(request_queue
 		bt->end_lba = -1ULL;
 
 	bt->pid = buts.pid;
-	bt->trace_state = Blktrace_setup;
 
 	ret = -EBUSY;
 	old_bt = xchg(&q->blk_trace, bt);
@@ -359,49 +220,21 @@ static int blk_trace_setup(request_queue
 	return 0;
 err:
 	if (bt) {
-		if (bt->dropped_file)
-			debugfs_remove(bt->dropped_file);
-		if (bt->sequence)
-			free_percpu(bt->sequence);
-		if (bt->rchan)
-			relay_close(bt->rchan);
+		if (bt->utt)
+			utt_trace_cleanup(bt->utt);
 		kfree(bt);
 	}
-	if (dir)
-		blk_remove_tree(dir);
 	return ret;
 }
 
 static int blk_trace_startstop(request_queue_t *q, int start)
 {
 	struct blk_trace *bt;
-	int ret;
 
 	if ((bt = q->blk_trace) == NULL)
 		return -EINVAL;
 
-	/*
-	 * For starting a trace, we can transition from a setup or stopped
-	 * trace. For stopping a trace, the state must be running
-	 */
-	ret = -EINVAL;
-	if (start) {
-		if (bt->trace_state == Blktrace_setup ||
-		    bt->trace_state == Blktrace_stopped) {
-			blktrace_seq++;
-			smp_mb();
-			bt->trace_state = Blktrace_running;
-			ret = 0;
-		}
-	} else {
-		if (bt->trace_state == Blktrace_running) {
-			bt->trace_state = Blktrace_stopped;
-			relay_flush(bt->rchan);
-			ret = 0;
-		}
-	}
-
-	return ret;
+	return utt_trace_startstop(bt->utt, start, &blktrace_seq);
 }
 
 /**
@@ -454,85 +287,8 @@ void blk_trace_shutdown(request_queue_t 
 	blk_trace_remove(q);
 }
 
-/*
- * Average offset over two calls to sched_clock() with a gettimeofday()
- * in the middle
- */
-static void blk_check_time(unsigned long long *t)
-{
-	unsigned long long a, b;
-	struct timeval tv;
-
-	a = sched_clock();
-	do_gettimeofday(&tv);
-	b = sched_clock();
-
-	*t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
-	*t -= (a + b) / 2;
-}
-
-static void blk_trace_check_cpu_time(void *data)
-{
-	unsigned long long *t;
-	int cpu = get_cpu();
-
-	t = &per_cpu(blk_trace_cpu_offset, cpu);
-
-	/*
-	 * Just call it twice, hopefully the second call will be cache hot
-	 * and a little more precise
-	 */
-	blk_check_time(t);
-	blk_check_time(t);
-
-	put_cpu();
-}
-
-/*
- * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
- * timings
- */
-static void blk_trace_calibrate_offsets(void)
-{
-	unsigned long flags;
-
-	smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
-	local_irq_save(flags);
-	blk_trace_check_cpu_time(NULL);
-	local_irq_restore(flags);
-}
-
-static void blk_trace_set_ht_offsets(void)
-{
-#if defined(CONFIG_SCHED_SMT)
-	int cpu, i;
-
-	/*
-	 * now make sure HT siblings have the same time offset
-	 */
-	preempt_disable();
-	for_each_online_cpu(cpu) {
-		unsigned long long *cpu_off, *sibling_off;
-
-		for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
-			if (i == cpu)
-				continue;
-
-			cpu_off = &per_cpu(blk_trace_cpu_offset, cpu);
-			sibling_off = &per_cpu(blk_trace_cpu_offset, i);
-			*sibling_off = *cpu_off;
-		}
-	}
-	preempt_enable();
-#endif
-}
-
 static __init int blk_trace_init(void)
 {
-	mutex_init(&blk_tree_mutex);
-	blk_trace_calibrate_offsets();
-	blk_trace_set_ht_offsets();
-
 	return 0;
 }
 
diff --git a/block/utt.c b/block/utt.c
new file mode 100644
index 0000000..a175999
--- /dev/null
+++ b/block/utt.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2006 Jens Axboe <axboe@suse.de>
+ *
+ * Moved to utt.c by Tom Zanussi, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <linux/utt.h>
+
+DEFINE_PER_CPU(unsigned long long, utt_trace_cpu_offset) = { 0, };
+EXPORT_PER_CPU_SYMBOL(utt_trace_cpu_offset);
+
+static inline void utt_remove_root(struct utt_trace *utt)
+{
+	if (utt->utt_tree_root && simple_empty(utt->utt_tree_root)) {
+		debugfs_remove(utt->utt_tree_root);
+		utt->utt_tree_root = NULL;
+	}
+}
+
+static void utt_remove_tree(struct utt_trace *utt)
+{
+	mutex_lock(&utt->utt_tree_mutex);
+	debugfs_remove(utt->dir);
+	if (--utt->root_users == 0)
+		utt_remove_root(utt);
+	mutex_unlock(&utt->utt_tree_mutex);
+}
+
+static struct dentry *utt_create_tree(struct utt_trace *utt, const char *root,
+				      const char *name)
+{
+	struct dentry *dir = NULL;
+
+	if (root == NULL || name == NULL)
+		return NULL;
+	
+	mutex_lock(&utt->utt_tree_mutex);
+
+	if (!utt->utt_tree_root) {
+		utt->utt_tree_root = debugfs_create_dir(root, NULL);
+		if (!utt->utt_tree_root)
+			goto err;
+	}
+
+	dir = debugfs_create_dir(name, utt->utt_tree_root);
+	if (dir)
+		utt->root_users++;
+	else
+		utt_remove_root(utt);
+
+err:
+	mutex_unlock(&utt->utt_tree_mutex);
+	return dir;
+}
+
+void utt_trace_cleanup(struct utt_trace *utt)
+{
+	relay_close(utt->rchan);
+	debugfs_remove(utt->dropped_file);
+	utt_remove_tree(utt);
+	free_percpu(utt->sequence);
+	kfree(utt);
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_cleanup);
+
+int utt_trace_remove(struct utt_trace *utt)
+{
+	if (utt->trace_state == Utt_trace_setup ||
+	    utt->trace_state == Utt_trace_stopped)
+		utt_trace_cleanup(utt);
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_remove);
+
+static int utt_dropped_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+
+	return 0;
+}
+
+static ssize_t utt_dropped_read(struct file *filp, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	struct utt_trace *utt = filp->private_data;
+	char buf[16];
+
+	snprintf(buf, sizeof(buf), "%u\n", atomic_read(&utt->dropped));
+
+	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static struct file_operations utt_dropped_fops = {
+	.owner =	THIS_MODULE,
+	.open =		utt_dropped_open,
+	.read =		utt_dropped_read,
+};
+
+/*
+ * Keep track of how many times we encountered a full subbuffer, to aid
+ * the user space app in telling how many lost events there were.
+ */
+static int utt_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
+				     void *prev_subbuf, size_t prev_padding)
+{
+	struct utt_trace *utt;
+
+	if (!relay_buf_full(buf))
+		return 1;
+
+	utt = buf->chan->private_data;
+	atomic_inc(&utt->dropped);
+	return 0;
+}
+
+static int utt_remove_buf_file_callback(struct dentry *dentry)
+{
+	debugfs_remove(dentry);
+	return 0;
+}
+
+static struct dentry *utt_create_buf_file_callback(const char *filename,
+						   struct dentry *parent,
+						   int mode,
+						   struct rchan_buf *buf,
+						   int *is_global)
+{
+	return debugfs_create_file(filename, mode, parent, buf,
+				   &relay_file_operations);
+}
+
+static struct rchan_callbacks utt_relay_callbacks = {
+	.subbuf_start		= utt_subbuf_start_callback,
+	.create_buf_file	= utt_create_buf_file_callback,
+	.remove_buf_file	= utt_remove_buf_file_callback,
+};
+
+/*
+ * Setup everything required to start tracing
+ */
+struct utt_trace *utt_trace_setup(struct utt_trace_setup *utts)
+{
+	struct utt_trace *utt = NULL;
+	struct dentry *dir = NULL;
+	int ret = -EINVAL;
+
+	if (!utts->buf_size || !utts->buf_nr)
+		goto err;
+
+	ret = -ENOMEM;
+	utt = kzalloc(sizeof(*utt), GFP_KERNEL);
+	if (!utt)
+		goto err;
+
+	mutex_init(&utt->utt_tree_mutex);
+
+	utt->sequence = alloc_percpu(unsigned long);
+	if (!utt->sequence)
+		goto err;
+
+	ret = -ENOENT;
+	dir = utt_create_tree(utt, utts->root, utts->name);
+	if (!dir)
+		goto err;
+
+	utt->dir = dir;
+	atomic_set(&utt->dropped, 0);
+
+	ret = -EIO;
+	utt->dropped_file = debugfs_create_file("dropped", 0444, dir, utt, &utt_dropped_fops);
+	if (!utt->dropped_file)
+		goto err;
+
+	utt->rchan = relay_open("trace", dir, utts->buf_size, utts->buf_nr, &utt_relay_callbacks);
+	if (!utt->rchan)
+		goto err;
+	utt->rchan->private_data = utt;
+
+	utt->trace_state = Utt_trace_setup;
+
+	utts->err = 0;
+	return utt;
+err:
+	if (utt) {
+		if (utt->dropped_file)
+			debugfs_remove(utt->dropped_file);
+		if (utt->sequence)
+			free_percpu(utt->sequence);
+		if (utt->rchan)
+			relay_close(utt->rchan);
+		kfree(utt);
+	}
+	if (dir)
+		utt_remove_tree(utt);
+	utts->err = ret;
+	return NULL;
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_setup);
+
+int utt_trace_startstop(struct utt_trace *utt, int start,
+			unsigned int *trace_seq)
+{
+	int ret;
+
+	/*
+	 * For starting a trace, we can transition from a setup or stopped
+	 * trace. For stopping a trace, the state must be running
+	 */
+	ret = -EINVAL;
+	if (start) {
+		if (utt->trace_state == Utt_trace_setup ||
+		    utt->trace_state == Utt_trace_stopped) {
+			if (trace_seq)
+				(*trace_seq)++;
+			smp_mb();
+			utt->trace_state = Utt_trace_running;
+			ret = 0;
+		}
+	} else {
+		if (utt->trace_state == Utt_trace_running) {
+			utt->trace_state = Utt_trace_stopped;
+			relay_flush(utt->rchan);
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_startstop);
+
+/*
+ * Average offset over two calls to sched_clock() with a gettimeofday()
+ * in the middle
+ */
+static void utt_check_time(unsigned long long *t)
+{
+	unsigned long long a, b;
+	struct timeval tv;
+
+	a = sched_clock();
+	do_gettimeofday(&tv);
+	b = sched_clock();
+
+	*t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
+	*t -= (a + b) / 2;
+}
+
+static void utt_check_cpu_time(void *data)
+{
+	unsigned long long *t;
+	int cpu = get_cpu();
+
+	t = &per_cpu(utt_trace_cpu_offset, cpu);
+
+	/*
+	 * Just call it twice, hopefully the second call will be cache hot
+	 * and a little more precise
+	 */
+	utt_check_time(t);
+	utt_check_time(t);
+
+	put_cpu();
+}
+
+/*
+ * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
+ * timings
+ */
+static void utt_calibrate_offsets(void)
+{
+	unsigned long flags;
+
+	smp_call_function(utt_check_cpu_time, NULL, 1, 1);
+	local_irq_save(flags);
+	utt_check_cpu_time(NULL);
+	local_irq_restore(flags);
+}
+
+static void utt_set_ht_offsets(void)
+{
+#if defined(CONFIG_SCHED_SMT)
+	int cpu, i;
+
+	/*
+	 * now make sure HT siblings have the same time offset
+	 */
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		unsigned long long *cpu_off, *sibling_off;
+
+		for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
+			if (i == cpu)
+				continue;
+
+			cpu_off = &per_cpu(utt_trace_cpu_offset, cpu);
+			sibling_off = &per_cpu(utt_trace_cpu_offset, i);
+			*sibling_off = *cpu_off;
+		}
+	}
+	preempt_enable();
+#endif
+}
+
+static __init int utt_init(void)
+{
+	utt_calibrate_offsets();
+	utt_set_ht_offsets();
+
+	return 0;
+}
+
+module_init(utt_init);
+
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7520cc1..d41bacc 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -2,7 +2,7 @@
 #define BLKTRACE_H
 
 #include <linux/blkdev.h>
-#include <linux/relay.h>
+#include <linux/utt.h>
 
 /*
  * Trace categories
@@ -96,34 +96,21 @@ struct blk_io_trace_remap {
 	__be64 sector;
 };
 
-enum {
-	Blktrace_setup = 1,
-	Blktrace_running,
-	Blktrace_stopped,
-};
-
 struct blk_trace {
-	int trace_state;
-	struct rchan *rchan;
-	unsigned long *sequence;
+	struct utt_trace *utt;
 	u16 act_mask;
 	u64 start_lba;
 	u64 end_lba;
 	u32 pid;
 	u32 dev;
-	struct dentry *dir;
-	struct dentry *dropped_file;
-	atomic_t dropped;
 };
 
 /*
  * User setup structure passed with BLKTRACESTART
  */
 struct blk_user_trace_setup {
-	char name[BDEVNAME_SIZE];	/* output */
+	struct utt_trace_setup utts;
 	u16 act_mask;			/* input */
-	u32 buf_size;			/* input */
-	u32 buf_nr;			/* input */
 	u64 start_lba;
 	u64 end_lba;
 	u32 pid;
diff --git a/include/linux/utt.h b/include/linux/utt.h
new file mode 100644
index 0000000..ee95461
--- /dev/null
+++ b/include/linux/utt.h
@@ -0,0 +1,53 @@
+#ifndef UTT_H
+#define UTT_H
+
+#include <linux/relay.h>
+
+enum {
+	Utt_trace_setup = 1,
+	Utt_trace_running,
+	Utt_trace_stopped,
+};
+
+struct utt_trace {
+	int trace_state;
+	struct rchan *rchan;
+	unsigned long *sequence;
+	struct dentry *dir;
+	struct dentry *dropped_file;
+	atomic_t dropped;
+	struct dentry *utt_tree_root;
+	struct mutex utt_tree_mutex;
+	unsigned int root_users;
+	void *private_data;
+};
+
+#define UTT_TRACE_ROOT_NAME_SIZE	32	/* Largest string for a root dir identifier */
+#define UTT_TRACE_NAME_SIZE		32	/* Largest string for a trace identifier */
+
+/*
+ * User setup structure
+ */
+struct utt_trace_setup {
+	char root[UTT_TRACE_ROOT_NAME_SIZE];	/* input */
+	char name[UTT_TRACE_NAME_SIZE];		/* input */
+	u32 buf_size;				/* input */
+	u32 buf_nr;				/* input */
+	int err;				/* output */
+};
+
+#if defined(CONFIG_UTT)
+DECLARE_PER_CPU(unsigned long long, utt_trace_cpu_offset);
+extern struct utt_trace *utt_trace_setup(struct utt_trace_setup *utts);
+extern int utt_trace_startstop(struct utt_trace *utt, int start,
+			       unsigned int *trace_seq);
+extern void utt_trace_cleanup(struct utt_trace *utt);
+extern int utt_trace_remove(struct utt_trace *utt);
+#else /* !CONFIG_UTT */
+#define utt_trace_setup(utts)		(NULL)
+#define utt_trace_startstop(utt, start, trace_seq)	(-EINVAL)
+#define utt_trace_cleanup(utt)		do { } while (0)
+#define utt_trace_remove(utt)		(-EINVAL)
+#endif /* CONFIG_UTT */
+
+#endif



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]