All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Yan, Zheng" <zheng.z.yan@intel.com>
To: ceph-devel@vger.kernel.org
Cc: greg@inktank.com, elder@inktank.com, "Yan,
	Zheng" <zheng.z.yan@intel.com>
Subject: [PATCH 2/4] ceph: take i_mutex before getting Fw cap
Date: Fri, 12 Apr 2013 16:11:10 +0800	[thread overview]
Message-ID: <1365754273-14088-3-git-send-email-zheng.z.yan@intel.com> (raw)
In-Reply-To: <1365754273-14088-1-git-send-email-zheng.z.yan@intel.com>

From: "Yan, Zheng" <zheng.z.yan@intel.com>

There is deadlock as illustrated bellow. The fix is taking i_mutex
before getting Fw cap reference.

      write                    truncate                 MDS
---------------------     --------------------      --------------
get Fw cap
                          lock i_mutex
lock i_mutex (blocked)
                          request setattr.size  ->
                                                <-   revoke Fw cap

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c | 13 +++++++------
 fs/ceph/file.c | 12 ++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0da2e94..8737572 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2058,6 +2058,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 		goto out;
 	}
 
+	/* finish pending truncate */
+	while (ci->i_truncate_pending) {
+		spin_unlock(&ci->i_ceph_lock);
+		__ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR));
+		spin_lock(&ci->i_ceph_lock);
+	}
+
 	if (need & CEPH_CAP_FILE_WR) {
 		if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
 			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
@@ -2079,12 +2086,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 	}
 	have = __ceph_caps_issued(ci, &implemented);
 
-	/*
-	 * disallow writes while a truncate is pending
-	 */
-	if (ci->i_truncate_pending)
-		have &= ~CEPH_CAP_FILE_WR;
-
 	if ((have & need) == need) {
 		/*
 		 * Look at (implemented & ~have & not) so that we keep waiting
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 546a705..5490598 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -647,7 +647,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)len, inode);
 again:
-	__ceph_do_pending_vmtruncate(inode, true);
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
@@ -724,7 +723,7 @@ retry_snap:
 		ret = -ENOSPC;
 		goto out;
 	}
-	__ceph_do_pending_vmtruncate(inode, true);
+	mutex_lock(&inode->i_mutex);
 	dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
 	     inode->i_size);
@@ -733,8 +732,10 @@ retry_snap:
 	else
 		want = CEPH_CAP_FILE_BUFFER;
 	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
-	if (ret < 0)
-		goto out_put;
+	if (ret < 0) {
+		mutex_unlock(&inode->i_mutex);
+		goto out;
+	}
 
 	dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
@@ -744,10 +745,10 @@ retry_snap:
 	    (iocb->ki_filp->f_flags & O_DIRECT) ||
 	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
 	    (fi->flags & CEPH_F_SYNC)) {
+		mutex_unlock(&inode->i_mutex);
 		ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
 			&iocb->ki_pos);
 	} else {
-		mutex_lock(&inode->i_mutex);
 		ret = __generic_file_aio_write(iocb, iov, nr_segs,
 					       &iocb->ki_pos);
 		mutex_unlock(&inode->i_mutex);
@@ -762,7 +763,6 @@ retry_snap:
 			__mark_inode_dirty(inode, dirty);
 	}
 
-out_put:
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
 	     ceph_cap_string(got));
-- 
1.7.11.7


  parent reply	other threads:[~2013-04-12  8:11 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-12  8:11 [PATCH 1/4] ceph: add osd request to inode unsafe list in advance Yan, Zheng
2013-04-12  8:11 ` [PATCH 1/2] mds: pass proper mask to CInode::get_caps_issued Yan, Zheng
2013-04-17 21:58   ` Gregory Farnum
2013-04-12  8:11 ` Yan, Zheng [this message]
2013-04-15 16:15   ` [PATCH 2/4] ceph: take i_mutex before getting Fw cap Alex Elder
2013-04-17  3:42   ` Sage Weil
2013-04-17 12:14     ` Yan, Zheng
2013-04-17 15:28       ` Sage Weil
2013-04-12  8:11 ` [PATCH 2/2] mds: change XLOCK/XLOCKDONE's next state to LOCK Yan, Zheng
2013-04-12  8:11 ` [PATCH 3/4] ceph: fix symlink inode operations Yan, Zheng
2013-04-15 16:15   ` Alex Elder
2013-04-12  8:11 ` [PATCH 4/4] ceph: apply write checks in ceph_aio_write Yan, Zheng
2013-04-15 16:15   ` Alex Elder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1365754273-14088-3-git-send-email-zheng.z.yan@intel.com \
    --to=zheng.z.yan@intel.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=elder@inktank.com \
    --cc=greg@inktank.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.