This is the mail archive of the cluster-cvs@sourceware.org mailing list for the cluster.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

cluster: STABLE2 - gfs-kmod: workaround for potential deadlock.Prefault user pages


Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=4787e11dc7831f42228b89ba7726fd6f6901a1e3
Commit:        4787e11dc7831f42228b89ba7726fd6f6901a1e3
Parent:        49885167a5b1d8a848ae6b747865d716026375e7
Author:        Benjamin Marzinski <bmarzins@redhat.com>
AuthorDate:    Fri Nov 21 13:53:08 2008 -0600
Committer:     Benjamin Marzinski <bmarzins@redhat.com>
CommitterDate: Fri Nov 21 13:53:08 2008 -0600

gfs-kmod: workaround for potential deadlock. Prefault user pages

The bug uncovered in 461770 does not seem fixable without a massive
change to how gfs works.  There is a lock ordering mismatch between
the process address space lock and the glocks. The only good way to
avoid this in all cases is to not hold the glock for so long, which
is what gfs2 does. This is impossible without completely changing
how gfs does locking.  Fortunately, this is only a problem when you
have multiple processes sharing an address space, and are doing IO
to a gfs file with a userspace buffer that's part of an mmapped gfs
file. In this case, prefaulting the buffer's pages immediately
before acquiring the glocks significantly shortens the window for
this deadlock. Closing the window any more causes a large
performance hit.
---
 gfs-kernel/src/gfs/ops_file.c |   86 ++++++++++++++++++++++++++---------------
 1 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/gfs-kernel/src/gfs/ops_file.c b/gfs-kernel/src/gfs/ops_file.c
index 13289b0..112c646 100644
--- a/gfs-kernel/src/gfs/ops_file.c
+++ b/gfs-kernel/src/gfs/ops_file.c
@@ -271,6 +271,37 @@ do_read_readi(struct file *file, char *buf, size_t size, loff_t *offset,
 }
 
 /**
+ * grope_mapping - feel up a mapping that needs to be written
+ * @buf: the start of the memory to be written
+ * @size: the size of the memory to be written
+ *
+ * We do this after acquiring the locks on the mapping,
+ * but before starting the write transaction.  We need to make
+ * sure that we don't cause recursive transactions if blocks
+ * need to be allocated to the file backing the mapping.
+ *
+ * Returns: errno
+ */
+
+static int
+grope_mapping(char *buf, size_t size)
+{
+	unsigned long start = (unsigned long)buf;
+	unsigned long stop = start + size;
+	char c;
+
+	while (start < stop) {
+		if (copy_from_user(&c, (char *)start, 1))
+			return -EFAULT;
+
+		start += PAGE_CACHE_SIZE;
+		start &= PAGE_CACHE_MASK;
+	}
+
+	return 0;
+}
+
+/**
  * do_read_direct - Read bytes from a file
  * @file: The file to read from
  * @buf: The buffer to copy into
@@ -306,6 +337,12 @@ do_read_direct(struct file *file, char *buf, size_t size, loff_t *offset,
 
 	gfs_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m(num_gh + 1, ghs);
 	if (error)
 		goto out;
@@ -369,6 +406,12 @@ do_read_buf(struct file *file, char *buf, size_t size, loff_t *offset,
 
 	gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m_atime(num_gh + 1, ghs);
 	if (error)
 		goto out;
@@ -439,37 +482,6 @@ gfs_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long count,
 }
 
 /**
- * grope_mapping - feel up a mapping that needs to be written
- * @buf: the start of the memory to be written
- * @size: the size of the memory to be written
- *
- * We do this after acquiring the locks on the mapping,
- * but before starting the write transaction.  We need to make
- * sure that we don't cause recursive transactions if blocks
- * need to be allocated to the file backing the mapping.
- *
- * Returns: errno
- */
-
-static int
-grope_mapping(char *buf, size_t size)
-{
-	unsigned long start = (unsigned long)buf;
-	unsigned long stop = start + size;
-	char c;
-
-	while (start < stop) {
-		if (copy_from_user(&c, (char *)start, 1))
-			return -EFAULT;
-
-		start += PAGE_CACHE_SIZE;
-		start &= PAGE_CACHE_MASK;
-	}
-
-	return 0;
-}
-
-/**
  * gfs_file_aio_write_nolock - Call vfs aio layer to write bytes to a file
  * @file: The file to write to
  * @buf: The buffer to copy from
@@ -671,6 +683,12 @@ do_write_direct(struct file *file, char *buf, size_t size, loff_t *offset,
  restart:
 	gfs_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m(num_gh + 1, ghs);
 	if (error)
 		goto out;
@@ -959,6 +977,12 @@ do_write_buf(struct file *file,
 
 	gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m(num_gh + 1, ghs);
 	if (error)
 		goto out;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]