All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiubo Li <xiubli@redhat.com>
To: jlayton@kernel.org
Cc: idryomov@gmail.com, vshankar@redhat.com,
	ceph-devel@vger.kernel.org, Xiubo Li <xiubli@redhat.com>
Subject: [PATCH] ceph: try to queue a writeback if revoking fails
Date: Thu, 28 Apr 2022 16:29:49 +0800	[thread overview]
Message-ID: <20220428082949.11841-1-xiubli@redhat.com> (raw)

If the pagecaches writeback just finished and the i_wrbuffer_ref
reaches zero it will try to trigger ceph_check_caps(). But if just
before ceph_check_caps() the i_wrbuffer_ref could be increased
again by mmap/cache write, then the Fwb revoke will fail.

We need to try to queue a writeback in this case instead of
triggering the writeback by BDI's delayed work per 5 seconds.

URL: https://tracker.ceph.com/issues/55377
URL: https://tracker.ceph.com/issues/46904
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c  | 44 +++++++++++++++++++++++++++++++++++---------
 fs/ceph/super.h |  7 +++++++
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 906c95d2a4ed..0c0c8f5ae3b3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1912,6 +1912,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	struct rb_node *p;
 	bool queue_invalidate = false;
 	bool tried_invalidate = false;
+	bool queue_writeback = false;
 
 	if (session)
 		ceph_get_mds_session(session);
@@ -2064,10 +2065,30 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		}
 
 		/* completed revocation? going down and there are no caps? */
-		if (revoking && (revoking & cap_used) == 0) {
-			dout("completed revocation of %s\n",
-			     ceph_cap_string(cap->implemented & ~cap->issued));
-			goto ack;
+		if (revoking) {
+			if ((revoking & cap_used) == 0) {
+				dout("completed revocation of %s\n",
+				      ceph_cap_string(cap->implemented & ~cap->issued));
+				goto ack;
+			}
+
+			/*
+			 * If the "i_wrbuffer_ref" was increased by mmap or generic
+			 * cache write just before the ceph_check_caps() is called,
+			 * the Fb capability revoking will fail this time. Then we
+			 * must wait for the BDI's delayed work to flush the dirty
+			 * pages and to release the "i_wrbuffer_ref", which will cost
+			 * at most 5 seconds. That means the MDS needs to wait at
+			 * most 5 seconds to finished the Fb capability's revocation.
+			 *
+			 * Let's queue a writeback for it.
+			 */
+			if ((ci->i_last_caps &
+			     (CEPH_CAP_FAKE_WRBUFFER | CEPH_CAP_FILE_BUFFER)) &&
+			    ci->i_wrbuffer_ref && S_ISREG(inode->i_mode) &&
+			    (revoking & CEPH_CAP_FILE_BUFFER)) {
+				queue_writeback = true;
+			}
 		}
 
 		/* want more caps from mds? */
@@ -2134,9 +2155,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		__cap_delay_requeue(mdsc, ci);
 	}
 
+	ci->i_last_caps = 0;
 	spin_unlock(&ci->i_ceph_lock);
 
 	ceph_put_mds_session(session);
+	if (queue_writeback)
+		ceph_queue_writeback(inode);
 	if (queue_invalidate)
 		ceph_queue_invalidate(inode);
 }
@@ -3084,16 +3108,16 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 		--ci->i_pin_ref;
 	if (had & CEPH_CAP_FILE_RD)
 		if (--ci->i_rd_ref == 0)
-			last++;
+			last |= CEPH_CAP_FILE_RD;
 	if (had & CEPH_CAP_FILE_CACHE)
 		if (--ci->i_rdcache_ref == 0)
-			last++;
+			last |= CEPH_CAP_FILE_CACHE;
 	if (had & CEPH_CAP_FILE_EXCL)
 		if (--ci->i_fx_ref == 0)
-			last++;
+			last |= CEPH_CAP_FILE_EXCL;
 	if (had & CEPH_CAP_FILE_BUFFER) {
 		if (--ci->i_wb_ref == 0) {
-			last++;
+			last |= CEPH_CAP_FILE_BUFFER;
 			/* put the ref held by ceph_take_cap_refs() */
 			put++;
 			check_flushsnaps = true;
@@ -3103,7 +3127,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 	}
 	if (had & CEPH_CAP_FILE_WR) {
 		if (--ci->i_wr_ref == 0) {
-			last++;
+			last |= CEPH_CAP_FILE_WR;
 			check_flushsnaps = true;
 			if (ci->i_wrbuffer_ref_head == 0 &&
 			    ci->i_dirty_caps == 0 &&
@@ -3131,6 +3155,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 			flushsnaps = 1;
 		wake = 1;
 	}
+	ci->i_last_caps |= last;
 	spin_unlock(&ci->i_ceph_lock);
 
 	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
@@ -3193,6 +3218,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 	spin_lock(&ci->i_ceph_lock);
 	ci->i_wrbuffer_ref -= nr;
 	if (ci->i_wrbuffer_ref == 0) {
+		ci->i_last_caps |= CEPH_CAP_FAKE_WRBUFFER;
 		last = true;
 		put++;
 	}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 73db7f6021f3..f275a41649af 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -362,6 +362,13 @@ struct ceph_inode_info {
 	struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
 	unsigned i_dirty_caps, i_flushing_caps;     /* mask of dirtied fields */
 
+	/*
+	 * The capabilities whose references reach to 0, and the bit
+	 * (CEPH_CAP_BITS) is for i_wrbuffer_ref.
+	 */
+#define CEPH_CAP_FAKE_WRBUFFER (1 << CEPH_CAP_BITS)
+	unsigned i_last_caps;
+
 	/*
 	 * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty
 	 * is protected by the mdsc->cap_dirty_lock, but each individual item
-- 
2.36.0.rc1


             reply	other threads:[~2022-04-28  8:35 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-28  8:29 Xiubo Li [this message]
2022-04-28 11:10 ` [PATCH] ceph: try to queue a writeback if revoking fails Jeff Layton
2022-04-28 11:52   ` Xiubo Li
2022-04-28 12:48 Xiubo Li
2022-04-28 12:49 ` Xiubo Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220428082949.11841-1-xiubli@redhat.com \
    --to=xiubli@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=vshankar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.