public inbox for cluster-cvs@sourceware.org
help / color / mirror / Atom feed
* cluster: RHEL47 - gfs-kernel: workaround for bz479081. Prefault user pages.
@ 2009-01-06 22:52 Benjamin Marzinski
  0 siblings, 0 replies; only message in thread
From: Benjamin Marzinski @ 2009-01-06 22:52 UTC (permalink / raw)
  To: cluster-cvs-relay

Gitweb:        http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=5449accf1f47b2a97f9f68699837c67540ae7ae7
Commit:        5449accf1f47b2a97f9f68699837c67540ae7ae7
Parent:        48a8896a0e35d0fbc7e275216ae809c8854f5b01
Author:        Benjamin Marzinski <bmarzins@redhat.com>
AuthorDate:    Mon Nov 17 17:39:05 2008 -0600
Committer:     Benjamin Marzinski <bmarzins@redhat.com>
CommitterDate: Tue Jan 6 16:43:50 2009 -0600

gfs-kernel: workaround for bz479081. Prefault user pages.

The bug uncovered in 479081 does not seem fixable without a massive
change to how gfs works.  There is a lock ordering mismatch between
the process address space lock and the glocks. The only good way to
avoid this in all cases is to not hold the glock for so long, which
is what gfs2 does. This is impossible without completely changing
how gfs does locking.  Fortunately, this is only a problem when you
have multiple processes sharing an address space, and are doing IO
to a gfs file with a userspace buffer that's part of an mmapped gfs
file. In this case, prefaulting the buffer's pages immediately
before acquiring the glocks significantly shortens the window for
this deadlock. Closing the window any more causes a large
performance hit.
---
 gfs-kernel/src/gfs/ops_file.c |   86 ++++++++++++++++++++++++++---------------
 1 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/gfs-kernel/src/gfs/ops_file.c b/gfs-kernel/src/gfs/ops_file.c
index d641758..65d64f8 100644
--- a/gfs-kernel/src/gfs/ops_file.c
+++ b/gfs-kernel/src/gfs/ops_file.c
@@ -283,6 +283,37 @@ do_read_readi(struct file *file, char *buf, size_t size, loff_t *offset,
 }
 
 /**
+ * grope_mapping - feel up a mapping that needs to be written
+ * @buf: the start of the memory to be written
+ * @size: the size of the memory to be written
+ *
+ * We do this after acquiring the locks on the mapping,
+ * but before starting the write transaction.  We need to make
+ * sure that we don't cause recursive transactions if blocks
+ * need to be allocated to the file backing the mapping.
+ *
+ * Returns: errno
+ */
+
+static int
+grope_mapping(char *buf, size_t size)
+{
+	unsigned long start = (unsigned long)buf;
+	unsigned long stop = start + size;
+	char c;
+
+	while (start < stop) {
+		if (copy_from_user(&c, (char *)start, 1))
+			return -EFAULT;
+
+		start += PAGE_CACHE_SIZE;
+		start &= PAGE_CACHE_MASK;
+	}
+
+	return 0;
+}
+
+/**
  * do_read_direct - Read bytes from a file
  * @file: The file to read from
  * @buf: The buffer to copy into
@@ -320,6 +351,12 @@ do_read_direct(struct file *file, char *buf, size_t size, loff_t *offset,
 
 	gfs_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m(num_gh + 1, ghs);
 	if (error)
 		goto out;
@@ -382,6 +419,12 @@ do_read_buf(struct file *file, char *buf, size_t size, loff_t *offset,
 
 	gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m_atime(num_gh + 1, ghs);
 	if (error)
 		goto out;
@@ -455,37 +498,6 @@ gfs_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 }
 
 /**
- * grope_mapping - feel up a mapping that needs to be written
- * @buf: the start of the memory to be written
- * @size: the size of the memory to be written
- *
- * We do this after acquiring the locks on the mapping,
- * but before starting the write transaction.  We need to make
- * sure that we don't cause recursive transactions if blocks
- * need to be allocated to the file backing the mapping.
- *
- * Returns: errno
- */
-
-static int
-grope_mapping(char *buf, size_t size)
-{
-	unsigned long start = (unsigned long)buf;
-	unsigned long stop = start + size;
-	char c;
-
-	while (start < stop) {
-		if (copy_from_user(&c, (char *)start, 1))
-			return -EFAULT;
-
-		start += PAGE_CACHE_SIZE;
-		start &= PAGE_CACHE_MASK;
-	}
-
-	return 0;
-}
-
-/**
  * do_write_direct_alloc - Write bytes to a file
  * @file: The file to write to
  * @buf: The buffer to copy from
@@ -663,6 +675,12 @@ do_write_direct(struct file *file, char *buf, size_t size, loff_t *offset,
 
 	gfs_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m(num_gh + 1, ghs);
 	if (error)
 		goto out;
@@ -968,6 +986,12 @@ do_write_buf(struct file *file,
 
 	gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
 
+	if (num_gh && atomic_read(&current->mm->mm_users) > 1) {
+		error = grope_mapping(buf, size);
+		if (error)
+			goto out;
+	}
+
 	error = gfs_glock_nq_m(num_gh + 1, ghs);
 	if (error)
 		goto out;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2009-01-06 22:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-06 22:52 cluster: RHEL47 - gfs-kernel: workaround for bz479081. Prefault user pages Benjamin Marzinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).