This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[SYSTEMTAP/PATCH 2/4] rt : replace read write lock with rcu


noticed that in rt mode - read and write_lock lead to several bug_on :

[ 1033.544821]  ffff880035463d38 ffffffff815fcaed ffff880035463d90 ffffffff816057f7
[ 1033.544826]  ffff880035463fd8 0000000000017e80 0000000000017e80 ffffffff810be5fb
[ 1033.544826] Call Trace:
[ 1033.544830]  [<ffffffff81602329>] dump_stack+0x4e/0x7a
[ 1033.544834]  [<ffffffff815fcaed>] __schedule_bug+0x9f/0xad
[ 1033.544838]  [<ffffffff816057f7>] __schedule+0x627/0x6a0
[ 1033.544842]  [<ffffffff810be5fb>] ? task_blocks_on_rt_mutex+0x19b/0x220
[ 1033.544846]  [<ffffffff816058a0>] schedule+0x30/0xa0
[ 1033.544850]  [<ffffffff8160727d>] rt_spin_lock_slowlock+0xbd/0x1f0
[ 1033.544856]  [<ffffffff81607df5>] __rt_spin_lock+0x25/0x30
[ 1033.544858]  [<ffffffff816080b0>] rt_read_lock+0x30/0x40
[ 1033.544861]  [<ffffffff816080ce>] rt_read_lock_irqsave+0xe/0x20
[ 1033.544867]  [<ffffffffa08a9b89>] __stp_tf_get_map_entry+0x19/0xc0 [stap_e40dcb2c46d7c0a2fb8d70ba343e393a_15235]
[ 1033.544873]  [<ffffffffa08afedd>] __stp_utrace_task_finder_target_syscall_exit+0x5d/0x350 [stap_e40dcb2c46d7c0a2fb8d70ba343e393a_15235]
[ 1033.544889]  [<ffffffffa08a91c5>] utrace_report_syscall_exit+0xc5/0x110 [stap_e40dcb2c46d7c0a2fb8d70ba343e393a_15235]
[ 1033.544893]  [<ffffffff81023ce0>] syscall_trace_leave+0x100/0x130
[ 1033.544896]  [<ffffffff8161090b>] int_check_syscall_exit_work+0x34/0x3d
[ 1033.544902] CPU: 5 PID: 20766 Comm: make Tainted: GF       W  O 3.14.12-rt-rt9+ #2
[ 1033.544904] Hardware name: Dell Inc. PowerEdge T620/0658N7, BIOS 1.4.6 10/26/2012
[ 1033.544912]  ffff880236f57e80 ffff8801cab21d28 ffffffff81602329 ffff8802243064a0
[ 1033.544919]  ffff8801cab21d38 ffffffff815fcaed ffff8801cab21d90 ffffffff816057f7
[ 1033.544924]  ffff8801cab21fd8 0000000000017e80 0000000000017e80 ffffffff810be5fb
[ 1033.544925] Call Trace:
[ 1033.544929]  [<ffffffff81602329>] dump_stack+0x4e/0x7a
[ 1033.544934]  [<ffffffff815fcaed>] __schedule_bug+0x9f/0xad
[ 1033.544938]  [<ffffffff816057f7>] __schedule+0x627/0x6a0
[ 1033.544942]  [<ffffffff810be5fb>] ? task_blocks_on_rt_mutex+0x19b/0x220
[ 1033.544946]  [<ffffffff816058a0>] schedule+0x30/0xa0
[ 1033.544951]  [<ffffffff8160727d>] rt_spin_lock_slowlock+0xbd/0x1f0
[ 1033.544956]  [<ffffffff81607df5>] __rt_spin_lock+0x25/0x30
[ 1033.544959]  [<ffffffff816080b0>] rt_read_lock+0x30/0x40
[ 1033.544962]  [<ffffffff816080ce>] rt_read_lock_irqsave+0xe/0x20
[ 1033.544968]  [<ffffffffa08a9b89>] __stp_tf_get_map_entry+0x19/0xc0 [stap_e40dcb2c46d7c0a2fb8d70ba343e393a_15235]
[ 1033.544975]  [<ffffffffa08afedd>] __stp_utrace_task_finder_target_syscall_exit+0x5d/0x350 [stap_e40dcb2c46d7c0a2fb8d70ba343e393a_15235]
[ 1033.544981]  [<ffffffffa08a91c5>] utrace_report_syscall_exit+0xc5/0x110 [stap_e40dcb2c46d7c0a2fb8d70ba343e393a_15235]
[ 1033.544984]  [<ffffffff81023ce0>] syscall_trace_leave+0x100/0x130
[ 1033.544988]  [<ffffffff8161090b>] int_check_syscall_exit_work+0x34/0x3d

By replacing read_lock_irqsave to rcu supress these bug_on for -rt mode. As
well writer lock replaced to raw_lock for -rt mode and in non-rt mode they
would switch to normal spin lock. Also changed hlist api to _rcu type api.

Signed-off-by: Santosh Shukla <sshukla@mvista.com>
---
 runtime/linux/addr-map.c        |   16 ++++++++-------
 runtime/linux/runtime.h         |    2 +-
 runtime/linux/task_finder_map.c |   38 ++++++++++++++++++----------------
 runtime/task_finder_vma.c       |   43 ++++++++++++++++++++++-----------------
 4 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/runtime/linux/addr-map.c b/runtime/linux/addr-map.c
index 3f5aca7..679ea7d 100644
--- a/runtime/linux/addr-map.c
+++ b/runtime/linux/addr-map.c
@@ -28,7 +28,7 @@ struct addr_map
   struct addr_map_entry entries[0];
 };
 
-static DEFINE_RWLOCK(addr_map_lock);
+static DEFINE_RAW_SPINLOCK(addr_map_lock);
 static struct addr_map* blackmap;
 
 /* Find address of entry where we can insert a new one. */
@@ -127,9 +127,11 @@ lookup_bad_addr(unsigned long addr, size_t size)
 #endif
 
   /* Search for the given range in the black-listed map.  */
-  read_lock_irqsave(&addr_map_lock, flags);
+  rcu_read_lock();
+  local_irq_save(flags);
   result = lookup_addr_aux(addr, size, blackmap);
-  read_unlock_irqrestore(&addr_map_lock, flags);
+  local_irq_restore(flags);
+  rcu_read_unlock();
   if (result)
     return 1;
   else
@@ -154,7 +156,7 @@ add_bad_addr_entry(unsigned long min_addr, unsigned long max_addr,
   while (1)
     {
       size_t old_size = 0;
-      write_lock_irqsave(&addr_map_lock, flags);
+      raw_spin_lock_irqsave(&addr_map_lock, flags);
       old_map = blackmap;
       if (old_map)
         old_size = old_map->size;
@@ -163,7 +165,7 @@ add_bad_addr_entry(unsigned long min_addr, unsigned long max_addr,
          added an entry while we were sleeping. */
       if (!new_map || (new_map && new_map->size < old_size + 1))
         {
-          write_unlock_irqrestore(&addr_map_lock, flags);
+          raw_spin_unlock_irqrestore(&addr_map_lock, flags);
           if (new_map)
             {
 	      _stp_kfree(new_map);
@@ -192,7 +194,7 @@ add_bad_addr_entry(unsigned long min_addr, unsigned long max_addr,
             *existing_min = min_entry;
           if (existing_max)
             *existing_max = max_entry;
-          write_unlock_irqrestore(&addr_map_lock, flags);
+          raw_spin_unlock_irqrestore(&addr_map_lock, flags);
           _stp_kfree(new_map);
           return 1;
         }
@@ -210,7 +212,7 @@ add_bad_addr_entry(unsigned long min_addr, unsigned long max_addr,
                (old_map->size - existing) * sizeof(*new_entry));
     }
   blackmap = new_map;
-  write_unlock_irqrestore(&addr_map_lock, flags);
+  raw_spin_unlock_irqrestore(&addr_map_lock, flags);
   if (old_map)
     _stp_kfree(old_map);
   return 0;
diff --git a/runtime/linux/runtime.h b/runtime/linux/runtime.h
index 76dbea4..0267808 100644
--- a/runtime/linux/runtime.h
+++ b/runtime/linux/runtime.h
@@ -74,7 +74,7 @@ static void _stp_exit(void);
 
 
 #ifdef STAPCONF_HLIST_4ARGS
-#define stap_hlist_for_each_entry(a,b,c,d) hlist_for_each_entry(a,b,c,d)
+#define stap_hlist_for_each_entry(a,b,c,d) hlist_for_each_entry_rcu(a,b,c,d)
 #define stap_hlist_for_each_entry_safe(a,b,c,d,e) hlist_for_each_entry_safe(a,b,c,d,e)
 #else
 #define stap_hlist_for_each_entry(a,b,c,d) (void) b; hlist_for_each_entry(a,c,d)
diff --git a/runtime/linux/task_finder_map.c b/runtime/linux/task_finder_map.c
index d515e9f..0256393 100644
--- a/runtime/linux/task_finder_map.c
+++ b/runtime/linux/task_finder_map.c
@@ -13,7 +13,7 @@
 // contents in interrupt context (which should only ever call 
 // stap_find_map_map_info for getting stored info). So we might
 // want to look into that if this seems a bottleneck.
-static DEFINE_RWLOCK(__stp_tf_map_lock);
+static DEFINE_RAW_SPINLOCK(__stp_tf_map_lock);
 
 #define __STP_TF_HASH_BITS 4
 #define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)
@@ -51,11 +51,11 @@ __stp_tf_map_initialize(void)
 	struct hlist_head *head = &__stp_tf_map_free_list[0];
 
 	unsigned long flags;
-	write_lock_irqsave(&__stp_tf_map_lock, flags);
+	raw_spin_lock_irqsave(&__stp_tf_map_lock, flags);
 	for (i = 0; i < TASK_FINDER_MAP_ENTRY_ITEMS; i++) {
-		hlist_add_head(&__stp_tf_map_free_list_items[i].hlist, head);
+		hlist_add_head_rcu(&__stp_tf_map_free_list_items[i].hlist, head);
 	}
-	write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+	raw_spin_unlock_irqrestore(&__stp_tf_map_lock, flags);
 }
 
 
@@ -75,7 +75,7 @@ __stp_tf_map_get_free_entry(void)
 		break;
 	}
 	if (entry != NULL)
-		hlist_del(&entry->hlist);
+		hlist_del_rcu(&entry->hlist);
 	return entry;
 }
 
@@ -87,7 +87,7 @@ static void
 __stp_tf_map_put_free_entry(struct __stp_tf_map_entry *entry)
 {
 	struct hlist_head *head = &__stp_tf_map_free_list[0];
-	hlist_add_head(&entry->hlist, head);
+	hlist_add_head_rcu(&entry->hlist, head);
 }
 
 
@@ -109,15 +109,19 @@ __stp_tf_get_map_entry(struct task_struct *tsk)
 	struct __stp_tf_map_entry *entry;
 
 	unsigned long flags;
-	read_lock_irqsave(&__stp_tf_map_lock, flags);
+	rcu_read_lock();
+	local_irq_save(flags);
 	head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
 	stap_hlist_for_each_entry(entry, node, head, hlist) {
 		if (tsk->pid == entry->pid) {
-			read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+			local_irq_restore(flags);
+			rcu_read_unlock();
 			return entry;
 		}
 	}
-	read_unlock_irqrestore(&__stp_tf_map_lock, flags);
+	local_irq_restore(flags);
+	rcu_read_unlock();
+
 	return NULL;
 }
 
@@ -133,14 +137,14 @@ __stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0,
 	struct __stp_tf_map_entry *entry;
 	unsigned long flags;
 
-	write_lock_irqsave(&__stp_tf_map_lock, flags);
+	raw_spin_lock_irqsave(&__stp_tf_map_lock, flags);
 	head = &__stp_tf_map_table[__stp_tf_map_hash(tsk)];
 	stap_hlist_for_each_entry(entry, node, head, hlist) {
 		// If we find an existing entry, just increment the
 		// usage count.
 		if (tsk->pid == entry->pid) {
 			entry->usage++;
-			write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+			raw_spin_unlock_irqrestore(&__stp_tf_map_lock, flags);
 			return 0;
 		}
 	}
@@ -148,7 +152,7 @@ __stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0,
 	// Get an element from the free list.
 	entry = __stp_tf_map_get_free_entry();
 	if (!entry) {
-		write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+		raw_spin_unlock_irqrestore(&__stp_tf_map_lock, flags);
 		return -ENOMEM;
 	}
 	entry->usage = 1;
@@ -157,8 +161,8 @@ __stp_tf_add_map(struct task_struct *tsk, long syscall_no, unsigned long arg0,
 	entry->arg0 = arg0;
 	entry->arg1 = arg1;
 	entry->arg2 = arg2;
-	hlist_add_head(&entry->hlist, head);
-	write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+	hlist_add_head_rcu(&entry->hlist, head);
+	raw_spin_unlock_irqrestore(&__stp_tf_map_lock, flags);
 	return 0;
 }
 
@@ -174,7 +178,7 @@ __stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry)
 
 	if (entry != NULL) {
 		unsigned long flags;
-		write_lock_irqsave(&__stp_tf_map_lock, flags);
+		raw_spin_lock_irqsave(&__stp_tf_map_lock, flags);
 
 		// Decrement the usage count.
 		entry->usage--;
@@ -182,10 +186,10 @@ __stp_tf_remove_map_entry(struct __stp_tf_map_entry *entry)
 		// If the entry is unused, put it back on the free
 		// list.
 		if (entry->usage == 0) {
-			hlist_del(&entry->hlist);
+			hlist_del_rcu(&entry->hlist);
 			__stp_tf_map_put_free_entry(entry);
 		}
-		write_unlock_irqrestore(&__stp_tf_map_lock, flags);
+		raw_spin_unlock_irqrestore(&__stp_tf_map_lock, flags);
 	}
 	return 0;
 }
diff --git a/runtime/task_finder_vma.c b/runtime/task_finder_vma.c
index f826982..ff7f2f3 100644
--- a/runtime/task_finder_vma.c
+++ b/runtime/task_finder_vma.c
@@ -15,7 +15,7 @@
 // contents in interrupt context (which should only ever call 
 // stap_find_vma_map_info for getting stored vma info). So we might
 // want to look into that if this seems a bottleneck.
-static DEFINE_RWLOCK(__stp_tf_vma_lock);
+static DEFINE_RAW_SPINLOCK(__stp_tf_vma_lock);
 
 #define __STP_TF_HASH_BITS 4
 #define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)
@@ -103,7 +103,7 @@ stap_destroy_vma_map(void)
 				continue;
 
 		        stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) {
-				hlist_del(&entry->hlist);
+				hlist_del_rcu(&entry->hlist);
 				__stp_tf_vma_release_entry(entry);
 			}
 		}
@@ -180,17 +180,17 @@ stap_add_vma_map_info(struct task_struct *tsk,
 	// Take a write lock, since we are most likely going to write
 	// after reading. But reserve a new entry first outside the lock.
 	new_entry = __stp_tf_vma_new_entry();
-	write_lock_irqsave(&__stp_tf_vma_lock, flags);
+	raw_spin_lock_irqsave(&__stp_tf_vma_lock, flags);
 	entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
 	if (entry != NULL) {
-		write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+		raw_spin_unlock_irqrestore(&__stp_tf_vma_lock, flags);
 		if (new_entry)
 			__stp_tf_vma_release_entry(new_entry);
 		return -EBUSY;	/* Already there */
 	}
 
 	if (!new_entry) {
-		write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+		raw_spin_unlock_irqrestore(&__stp_tf_vma_lock, flags);
 		return -ENOMEM;
 	}
 
@@ -212,8 +212,8 @@ stap_add_vma_map_info(struct task_struct *tsk,
 	entry->user = user;
 
 	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
-	hlist_add_head(&entry->hlist, head);
-	write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+	hlist_add_head_rcu(&entry->hlist, head);
+	raw_spin_unlock_irqrestore(&__stp_tf_vma_lock, flags);
 	return 0;
 }
 
@@ -234,13 +234,13 @@ stap_extend_vma_map_info(struct task_struct *tsk,
 
 	// Take a write lock, since we are most likely going to write
 	// to the entry after reading, if its vm_end matches our vm_start.
-	write_lock_irqsave(&__stp_tf_vma_lock, flags);
+	raw_spin_lock_irqsave(&__stp_tf_vma_lock, flags);
 	entry = __stp_tf_get_vma_map_entry_end_internal(tsk, vm_start);
 	if (entry != NULL) {
 		entry->vm_end = vm_end;
 		res = 0;
 	}
-	write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+	raw_spin_unlock_irqrestore(&__stp_tf_vma_lock, flags);
 	return res;
 }
 
@@ -258,14 +258,14 @@ stap_remove_vma_map_info(struct task_struct *tsk, unsigned long vm_start)
 	// Take a write lock since we are most likely going to delete
 	// after reading.
 	unsigned long flags;
-	write_lock_irqsave(&__stp_tf_vma_lock, flags);
+	raw_spin_lock_irqsave(&__stp_tf_vma_lock, flags);
 	entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
 	if (entry != NULL) {
-		hlist_del(&entry->hlist);
+		hlist_del_rcu(&entry->hlist);
 		__stp_tf_vma_release_entry(entry);
                 rc = 0;
 	}
-	write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+	raw_spin_unlock_irqrestore(&__stp_tf_vma_lock, flags);
 	return rc;
 }
 
@@ -288,7 +288,8 @@ stap_find_vma_map_info(struct task_struct *tsk, unsigned long addr,
 	if (__stp_tf_vma_map == NULL)
 		return rc;
 
-	read_lock_irqsave(&__stp_tf_vma_lock, flags);
+	rcu_read_lock();
+	local_irq_save(flags);
 	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
 	stap_hlist_for_each_entry(entry, node, head, hlist) {
 		if (tsk->pid == entry->pid
@@ -309,7 +310,9 @@ stap_find_vma_map_info(struct task_struct *tsk, unsigned long addr,
 			*user = found_entry->user;
 		rc = 0;
 	}
-	read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+	local_irq_restore(flags);
+	rcu_read_unlock();
+
 	return rc;
 }
 
@@ -332,7 +335,8 @@ stap_find_vma_map_info_user(struct task_struct *tsk, void *user,
 	if (__stp_tf_vma_map == NULL)
 		return rc;
 
-	read_lock_irqsave(&__stp_tf_vma_lock, flags);
+	rcu_read_lock();
+	local_irq_save(flags);
 	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
 	stap_hlist_for_each_entry(entry, node, head, hlist) {
 		if (tsk->pid == entry->pid
@@ -350,7 +354,8 @@ stap_find_vma_map_info_user(struct task_struct *tsk, void *user,
 			*path = found_entry->path;
 		rc = 0;
 	}
-	read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+	local_irq_restore(flags);
+	rcu_read_unlock();
 	return rc;
 }
 
@@ -363,15 +368,15 @@ stap_drop_vma_maps(struct task_struct *tsk)
 	struct __stp_tf_vma_entry *entry;
 
 	unsigned long flags;
-	write_lock_irqsave(&__stp_tf_vma_lock, flags);
+	raw_spin_lock_irqsave(&__stp_tf_vma_lock, flags);
 	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
         stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) {
             if (tsk->pid == entry->pid) {
-		    hlist_del(&entry->hlist);
+		    hlist_del_rcu(&entry->hlist);
 		    __stp_tf_vma_release_entry(entry);
             }
         }
-	write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
+	raw_spin_unlock_irqrestore(&__stp_tf_vma_lock, flags);
 	return 0;
 }
 
-- 
1.7.9.5


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]