All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peng Tao <tao.peng@primarydata.com>
To: linux-nfs@vger.kernel.org
Cc: Trond Myklebust <trond.myklebust@primarydata.com>,
	Peng Tao <tao.peng@primarydata.com>
Subject: [PATCH] nfs: fix dio deadlock when O_DIRECT flag is flipped
Date: Wed, 14 Jan 2015 22:09:03 +0800	[thread overview]
Message-ID: <1421244543-32539-1-git-send-email-tao.peng@primarydata.com> (raw)

Running xfstest generic/036, we'll get following VM_BUG_ON in
nfs_direct_IO(). 036 toggles O_DIRECT flag while IO is going on.
So the VM_BUG_ON should not exist there. However, we have a deadlock
in the code path as well, because inode->i_mutex is taken when calling
into ->direct_IO. And nfs_file_direct_write() would grab inode->i_mutex
again.

Meanwhile, nfs_file_direct_write() does a lot of things that is already
done by vfs before ->direct_IO is called. So skip those duplicates. One
exception is i_size_write. vfs does not take i_lock when setting i_size.
But nfs appears to need i_lock when setting i_size.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
---
 fs/nfs/direct.c | 92 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 65 insertions(+), 27 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bfd9d49..9f88b13 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -105,6 +105,8 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
 static void nfs_direct_write_schedule_work(struct work_struct *work);
+static ssize_t nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos);
 
 static inline void get_dreq(struct nfs_direct_req *dreq)
 {
@@ -257,11 +259,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 
 	return -EINVAL;
 #else
-	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
-
 	if (rw == READ)
 		return nfs_file_direct_read(iocb, iter, pos);
-	return nfs_file_direct_write(iocb, iter, pos);
+	return nfs_direct_write(iocb, iter, pos);
 #endif /* CONFIG_NFS_SWAP */
 }
 
@@ -919,6 +919,66 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	return 0;
 }
 
+static ssize_t _nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+				   struct inode *inode, struct nfs_direct_req **dreqp,
+				   size_t count, loff_t pos)
+{
+	ssize_t result;
+	struct nfs_direct_req *dreq;
+	struct nfs_lock_context *l_ctx;
+
+	task_io_account_write(count);
+
+	result = -ENOMEM;
+	dreq = nfs_direct_req_alloc();
+	if (!dreq)
+		goto out;
+
+	dreq->inode = inode;
+	dreq->bytes_left = count;
+	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+	l_ctx = nfs_get_lock_context(dreq->ctx);
+	if (IS_ERR(l_ctx)) {
+		result = PTR_ERR(l_ctx);
+		nfs_direct_req_release(dreq);
+		goto out;
+	}
+	dreq->l_ctx = l_ctx;
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
+
+	result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+	*dreqp = dreq;
+
+out:
+	return result;
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	struct nfs_direct_req *uninitialized_var(dreq);
+	size_t count = iov_iter_count(iter);
+	ssize_t result;
+
+	result = _nfs_direct_write(iocb, iter, inode, &dreq, count, pos);
+	if (!result) {
+		result = nfs_direct_wait(dreq);
+		if (result > 0) {
+			iocb->ki_pos = pos + result;
+			spin_lock(&inode->i_lock);
+			if (i_size_read(inode) < iocb->ki_pos)
+				i_size_write(inode, iocb->ki_pos);
+			spin_unlock(&inode->i_lock);
+		}
+	}
+	nfs_direct_req_release(dreq);
+	return result;
+}
+
 /**
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
@@ -947,8 +1007,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	struct nfs_direct_req *dreq;
-	struct nfs_lock_context *l_ctx;
+	struct nfs_direct_req *uninitialized_var(dreq);
 	loff_t end;
 	size_t count = iov_iter_count(iter);
 	end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -982,26 +1041,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 			goto out_unlock;
 	}
 
-	task_io_account_write(count);
-
-	result = -ENOMEM;
-	dreq = nfs_direct_req_alloc();
-	if (!dreq)
-		goto out_unlock;
-
-	dreq->inode = inode;
-	dreq->bytes_left = count;
-	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
-	l_ctx = nfs_get_lock_context(dreq->ctx);
-	if (IS_ERR(l_ctx)) {
-		result = PTR_ERR(l_ctx);
-		goto out_release;
-	}
-	dreq->l_ctx = l_ctx;
-	if (!is_sync_kiocb(iocb))
-		dreq->iocb = iocb;
-
-	result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+	result = _nfs_direct_write(iocb, iter, inode, &dreq, count, pos);
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
@@ -1025,8 +1065,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 	nfs_direct_req_release(dreq);
 	return result;
 
-out_release:
-	nfs_direct_req_release(dreq);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
 out:
-- 
1.9.1


             reply	other threads:[~2015-01-14 14:09 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-14 14:09 Peng Tao [this message]
2015-01-14 15:48 ` [PATCH] nfs: fix dio deadlock when O_DIRECT flag is flipped Christoph Hellwig
     [not found]   ` <CAHQdGtQAqn9zh8UC-dn2s3nLmnRyyxeSNdk=+FZdYmj46ZEcXA@mail.gmail.com>
2015-01-14 16:21     ` Trond Myklebust
2015-01-14 16:55   ` Peng Tao
2015-01-15 15:17     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1421244543-32539-1-git-send-email-tao.peng@primarydata.com \
    --to=tao.peng@primarydata.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@primarydata.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.