ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps
@ 2021-08-25 13:45 xiubli
  2021-08-25 13:45 ` [PATCH v3 1/3] " xiubli
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: xiubli @ 2021-08-25 13:45 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, ukernel, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

V3:
- fix one crash bug in the first patch.

V2:
- minor fixes to clean up the code from Jeff's comments, thanks
- swith to use lockdep_assert_held().



Test this for around 5 hours and this patch series worked well for me, my test script is:

$ while [ 1 ]; do date; for d in A B C; do (for i in {1..3}; do ./bin/mount.ceph :/ /mnt/kcephfs.$d -o noshare; rm -rf /mnt/kcephfs.$d/file$i.txt; rmdir /mnt/kcephfs.$d/.snap/snap$i; dd if=/dev/zero of=/mnt/kcephfs.$d/file$i.txt bs=1M count=8; mkdir -p /mnt/kcephfs.$d/.snap/snap$i; umount -fl /mnt/kcephfs.$d; done ) & done; wait; date; done



Xiubo Li (3):
  ceph: remove the capsnaps when removing the caps
  ceph: don't WARN if we're force umounting
  ceph: don't WARN if we're iterate removing the session caps

 fs/ceph/caps.c       | 106 ++++++++++++++++++++++++++++++++-----------
 fs/ceph/mds_client.c |  40 ++++++++++++++--
 fs/ceph/super.h      |   7 +++
 3 files changed, 123 insertions(+), 30 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-25 13:45 [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps xiubli
@ 2021-08-25 13:45 ` xiubli
  2021-08-25 14:25   ` Jeff Layton
  2021-08-25 13:45 ` [PATCH v3 2/3] ceph: don't WARN if we're force umounting xiubli
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: xiubli @ 2021-08-25 13:45 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, ukernel, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

The capsnaps will ihold the inodes when queuing to flush, so when
force umounting it will close the sessions first and if the MDSes
respond very fast and the session connections are closed just
before killing the superblock, which will flush the msgr queue,
then the flush capsnap callback won't ever be called, which will
lead the memory leak bug for the ceph_inode_info.

URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 67 +++++++++++++++++++++++++++++++++-----------
 fs/ceph/mds_client.c | 31 +++++++++++++++++++-
 fs/ceph/super.h      |  6 ++++
 3 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1e6261a16fb5..61326b490b2b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3162,7 +3162,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				break;
 			}
 		}
-		BUG_ON(!found);
+
+		/*
+		 * The capsnap should already be removed when
+		 * removing auth cap in case likes force unmount.
+		 */
+		BUG_ON(!found && ci->i_auth_cap);
+		if (!found)
+			goto unlock;
+
 		capsnap->dirty_pages -= nr;
 		if (capsnap->dirty_pages == 0) {
 			complete_capsnap = true;
@@ -3184,6 +3192,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 		     complete_capsnap ? " (complete capsnap)" : "");
 	}
 
+unlock:
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (last) {
@@ -3658,6 +3667,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 		iput(inode);
 }
 
+void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+			   bool *wake_ci, bool *wake_mdsc)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	bool ret;
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
+
+	list_del_init(&capsnap->ci_item);
+	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
+	if (wake_ci)
+		*wake_ci = ret;
+
+	spin_lock(&mdsc->cap_dirty_lock);
+	if (list_empty(&ci->i_cap_flush_list))
+		list_del_init(&ci->i_flushing_item);
+
+	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
+	if (wake_mdsc)
+		*wake_mdsc = ret;
+	spin_unlock(&mdsc->cap_dirty_lock);
+}
+
+void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+			 bool *wake_ci, bool *wake_mdsc)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
+	__ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
+}
+
 /*
  * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
  * throw away our cap_snap.
@@ -3695,23 +3741,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 			     capsnap, capsnap->follows);
 		}
 	}
-	if (flushed) {
-		WARN_ON(capsnap->dirty_pages || capsnap->writing);
-		dout(" removing %p cap_snap %p follows %lld\n",
-		     inode, capsnap, follows);
-		list_del(&capsnap->ci_item);
-		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
-
-		spin_lock(&mdsc->cap_dirty_lock);
-
-		if (list_empty(&ci->i_cap_flush_list))
-			list_del_init(&ci->i_flushing_item);
-
-		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
-							  &capsnap->cap_flush);
-		spin_unlock(&mdsc->cap_dirty_lock);
-	}
+	if (flushed)
+		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
 	spin_unlock(&ci->i_ceph_lock);
+
 	if (flushed) {
 		ceph_put_snap_context(capsnap->context);
 		ceph_put_cap_snap(capsnap);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index df3a735f7837..36ad0ebb2295 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1604,14 +1604,39 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
 	return ret;
 }
 
+static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap_snap *capsnap;
+	int capsnap_release = 0;
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+
+	while (!list_empty(&ci->i_cap_snaps)) {
+		capsnap = list_first_entry(&ci->i_cap_snaps,
+					   struct ceph_cap_snap, ci_item);
+		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
+		ceph_put_snap_context(capsnap->context);
+		ceph_put_cap_snap(capsnap);
+		capsnap_release++;
+	}
+	wake_up_all(&ci->i_cap_wq);
+	wake_up_all(&mdsc->cap_flushing_wq);
+	return capsnap_release;
+}
+
 static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 				  void *arg)
 {
 	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	LIST_HEAD(to_remove);
 	bool dirty_dropped = false;
 	bool invalidate = false;
+	int capsnap_release = 0;
 
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
@@ -1619,7 +1644,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	__ceph_remove_cap(cap, false);
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
-		struct ceph_mds_client *mdsc = fsc->mdsc;
 
 		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 			if (inode->i_data.nrpages > 0)
@@ -1683,6 +1707,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
 			ci->i_prealloc_cap_flush = NULL;
 		}
+
+		if (!list_empty(&ci->i_cap_snaps))
+			capsnap_release = remove_capsnaps(mdsc, inode);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 	while (!list_empty(&to_remove)) {
@@ -1699,6 +1726,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		ceph_queue_invalidate(inode);
 	if (dirty_dropped)
 		iput(inode);
+	while (capsnap_release--)
+		iput(inode);
 	return 0;
 }
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 8f4f2747be65..445d13d760d1 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1169,6 +1169,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
 					    int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
+extern void __ceph_remove_capsnap(struct inode *inode,
+				  struct ceph_cap_snap *capsnap,
+				  bool *wake_ci, bool *wake_mdsc);
+extern void ceph_remove_capsnap(struct inode *inode,
+				struct ceph_cap_snap *capsnap,
+				bool *wake_ci, bool *wake_mdsc);
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
 			     struct ceph_mds_session **psession);
 extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 2/3] ceph: don't WARN if we're force umounting
  2021-08-25 13:45 [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps xiubli
  2021-08-25 13:45 ` [PATCH v3 1/3] " xiubli
@ 2021-08-25 13:45 ` xiubli
  2021-08-25 13:45 ` [PATCH v3 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli
  2021-08-25 17:16 ` [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps Jeff Layton
  3 siblings, 0 replies; 8+ messages in thread
From: xiubli @ 2021-08-25 13:45 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, ukernel, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Force umount will try to close the sessions by setting the session
state to _CLOSING, so in ceph_kill_sb after that it will warn on it.

URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 36ad0ebb2295..8758a27e691c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4566,6 +4566,8 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 
 bool check_session_state(struct ceph_mds_session *s)
 {
+	struct ceph_fs_client *fsc = s->s_mdsc->fsc;
+
 	switch (s->s_state) {
 	case CEPH_MDS_SESSION_OPEN:
 		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
@@ -4574,8 +4576,9 @@ bool check_session_state(struct ceph_mds_session *s)
 		}
 		break;
 	case CEPH_MDS_SESSION_CLOSING:
-		/* Should never reach this when we're unmounting */
-		WARN_ON_ONCE(s->s_ttl);
+		/* Should never reach this when none force unmounting */
+		WARN_ON_ONCE(s->s_ttl &&
+			     READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
 		fallthrough;
 	case CEPH_MDS_SESSION_NEW:
 	case CEPH_MDS_SESSION_RESTARTING:
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 3/3] ceph: don't WARN if we're iterate removing the session caps
  2021-08-25 13:45 [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps xiubli
  2021-08-25 13:45 ` [PATCH v3 1/3] " xiubli
  2021-08-25 13:45 ` [PATCH v3 2/3] ceph: don't WARN if we're force umounting xiubli
@ 2021-08-25 13:45 ` xiubli
  2021-08-25 17:16 ` [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps Jeff Layton
  3 siblings, 0 replies; 8+ messages in thread
From: xiubli @ 2021-08-25 13:45 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, ukernel, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

For example in case force umounting it will remove all the session
caps one by one even it's dirty cap.

URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 39 ++++++++++++++++++++++++++++++---------
 fs/ceph/mds_client.c |  2 +-
 fs/ceph/super.h      |  1 +
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 61326b490b2b..6f20e18023c8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1114,17 +1114,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 		return;
 	}
 
+	lockdep_assert_held(&ci->i_ceph_lock);
+
 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
 
 	mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
 
 	/* remove from inode's cap rbtree, and clear auth cap */
 	rb_erase(&cap->ci_node, &ci->i_caps);
-	if (ci->i_auth_cap == cap) {
-		WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
-			     !mdsc->fsc->blocklisted);
+	if (ci->i_auth_cap == cap)
 		ci->i_auth_cap = NULL;
-	}
 
 	/* remove from session list */
 	spin_lock(&session->s_cap_lock);
@@ -1176,6 +1175,28 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 	}
 }
 
+void ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+{
+	struct ceph_inode_info *ci = cap->ci;
+	struct ceph_fs_client *fsc;
+
+	/* 'ci' being NULL means the remove have already occurred */
+	if (!ci) {
+		dout("%s: cap inode is NULL\n", __func__);
+		return;
+	}
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+	WARN_ON_ONCE(ci->i_auth_cap == cap &&
+		     !list_empty(&ci->i_dirty_item) &&
+		     !fsc->blocklisted &&
+		     READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
+
+	__ceph_remove_cap(cap, queue_release);
+}
+
 struct cap_msg_args {
 	struct ceph_mds_session	*session;
 	u64			ino, cid, follows;
@@ -1304,7 +1325,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
 	while (p) {
 		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
 		p = rb_next(p);
-		__ceph_remove_cap(cap, true);
+		ceph_remove_cap(cap, true);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 }
@@ -3828,7 +3849,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		goto out_unlock;
 
 	if (target < 0) {
-		__ceph_remove_cap(cap, false);
+		ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
 
@@ -3863,7 +3884,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 				change_auth_cap_ses(ci, tcap->session);
 			}
 		}
-		__ceph_remove_cap(cap, false);
+		ceph_remove_cap(cap, false);
 		goto out_unlock;
 	} else if (tsession) {
 		/* add placeholder for the export tagert */
@@ -3880,7 +3901,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 			spin_unlock(&mdsc->cap_dirty_lock);
 		}
 
-		__ceph_remove_cap(cap, false);
+		ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
 
@@ -3991,7 +4012,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 					ocap->mseq, mds, le32_to_cpu(ph->seq),
 					le32_to_cpu(ph->mseq));
 		}
-		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+		ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
 	}
 
 	*old_issued = issued;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8758a27e691c..3cfd4654d17c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2016,7 +2016,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
-		__ceph_remove_cap(cap, true);
+		ceph_remove_cap(cap, true);
 		(*remaining)--;
 	} else {
 		struct dentry *dentry;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 445d13d760d1..c1add4ed59d2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1144,6 +1144,7 @@ extern void ceph_add_cap(struct inode *inode,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
 extern void __ceph_remove_caps(struct ceph_inode_info *ci);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-25 13:45 ` [PATCH v3 1/3] " xiubli
@ 2021-08-25 14:25   ` Jeff Layton
  2021-08-26  0:48     ` Xiubo Li
  0 siblings, 1 reply; 8+ messages in thread
From: Jeff Layton @ 2021-08-25 14:25 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, ukernel, pdonnell, ceph-devel

On Wed, 2021-08-25 at 21:45 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> The capsnaps will ihold the inodes when queuing to flush, so when
> force umounting it will close the sessions first and if the MDSes
> respond very fast and the session connections are closed just
> before killing the superblock, which will flush the msgr queue,
> then the flush capsnap callback won't ever be called, which will
> lead the memory leak bug for the ceph_inode_info.
> 
> URL: https://tracker.ceph.com/issues/52295
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c       | 67 +++++++++++++++++++++++++++++++++-----------
>  fs/ceph/mds_client.c | 31 +++++++++++++++++++-
>  fs/ceph/super.h      |  6 ++++
>  3 files changed, 86 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 1e6261a16fb5..61326b490b2b 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3162,7 +3162,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>  				break;
>  			}
>  		}
> -		BUG_ON(!found);
> +
> +		/*
> +		 * The capsnap should already be removed when
> +		 * removing auth cap in case likes force unmount.
> +		 */
> +		BUG_ON(!found && ci->i_auth_cap);
> +		if (!found)
> +			goto unlock;
> +
>  		capsnap->dirty_pages -= nr;
>  		if (capsnap->dirty_pages == 0) {
>  			complete_capsnap = true;
> @@ -3184,6 +3192,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>  		     complete_capsnap ? " (complete capsnap)" : "");
>  	}
>  
> +unlock:
>  	spin_unlock(&ci->i_ceph_lock);
>  
>  	if (last) {
> @@ -3658,6 +3667,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>  		iput(inode);
>  }
>  
> +void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
> +			   bool *wake_ci, bool *wake_mdsc)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> +	bool ret;
> +
> +	lockdep_assert_held(&ci->i_ceph_lock);

Hmm, your earlier patch had a note saying that the s_mutex needed to he
held here too. Is that not the case?

> +
> +	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
> +
> +	list_del_init(&capsnap->ci_item);
> +	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> +	if (wake_ci)
> +		*wake_ci = ret;
> +
> +	spin_lock(&mdsc->cap_dirty_lock);
> +	if (list_empty(&ci->i_cap_flush_list))
> +		list_del_init(&ci->i_flushing_item);
> +
> +	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
> +	if (wake_mdsc)
> +		*wake_mdsc = ret;
> +	spin_unlock(&mdsc->cap_dirty_lock);
> +}
> +
> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
> +			 bool *wake_ci, bool *wake_mdsc)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +
> +	lockdep_assert_held(&ci->i_ceph_lock);
> +
> +	WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
> +	__ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
> +}
> +
>  /*
>   * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
>   * throw away our cap_snap.
> @@ -3695,23 +3741,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
>  			     capsnap, capsnap->follows);
>  		}
>  	}
> -	if (flushed) {
> -		WARN_ON(capsnap->dirty_pages || capsnap->writing);
> -		dout(" removing %p cap_snap %p follows %lld\n",
> -		     inode, capsnap, follows);
> -		list_del(&capsnap->ci_item);
> -		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> -
> -		spin_lock(&mdsc->cap_dirty_lock);
> -
> -		if (list_empty(&ci->i_cap_flush_list))
> -			list_del_init(&ci->i_flushing_item);
> -
> -		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
> -							  &capsnap->cap_flush);
> -		spin_unlock(&mdsc->cap_dirty_lock);
> -	}
> +	if (flushed)
> +		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
>  	spin_unlock(&ci->i_ceph_lock);
> +
>  	if (flushed) {
>  		ceph_put_snap_context(capsnap->context);
>  		ceph_put_cap_snap(capsnap);
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index df3a735f7837..36ad0ebb2295 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1604,14 +1604,39 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  	return ret;
>  }
>  
> +static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_cap_snap *capsnap;
> +	int capsnap_release = 0;
> +
> +	lockdep_assert_held(&ci->i_ceph_lock);
> +
> +	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
> +
> +	while (!list_empty(&ci->i_cap_snaps)) {
> +		capsnap = list_first_entry(&ci->i_cap_snaps,
> +					   struct ceph_cap_snap, ci_item);
> +		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
> +		ceph_put_snap_context(capsnap->context);
> +		ceph_put_cap_snap(capsnap);
> +		capsnap_release++;
> +	}
> +	wake_up_all(&ci->i_cap_wq);
> +	wake_up_all(&mdsc->cap_flushing_wq);
> +	return capsnap_release;
> +}
> +
>  static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  				  void *arg)
>  {
>  	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
> +	struct ceph_mds_client *mdsc = fsc->mdsc;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	LIST_HEAD(to_remove);
>  	bool dirty_dropped = false;
>  	bool invalidate = false;
> +	int capsnap_release = 0;
>  
>  	dout("removing cap %p, ci is %p, inode is %p\n",
>  	     cap, ci, &ci->vfs_inode);
> @@ -1619,7 +1644,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	__ceph_remove_cap(cap, false);
>  	if (!ci->i_auth_cap) {
>  		struct ceph_cap_flush *cf;
> -		struct ceph_mds_client *mdsc = fsc->mdsc;
>  
>  		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
>  			if (inode->i_data.nrpages > 0)
> @@ -1683,6 +1707,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
>  			ci->i_prealloc_cap_flush = NULL;
>  		}
> +
> +		if (!list_empty(&ci->i_cap_snaps))
> +			capsnap_release = remove_capsnaps(mdsc, inode);
>  	}
>  	spin_unlock(&ci->i_ceph_lock);
>  	while (!list_empty(&to_remove)) {
> @@ -1699,6 +1726,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  		ceph_queue_invalidate(inode);
>  	if (dirty_dropped)
>  		iput(inode);
> +	while (capsnap_release--)
> +		iput(inode);
>  	return 0;
>  }
>  
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 8f4f2747be65..445d13d760d1 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1169,6 +1169,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
>  					    int had);
>  extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>  				       struct ceph_snap_context *snapc);
> +extern void __ceph_remove_capsnap(struct inode *inode,
> +				  struct ceph_cap_snap *capsnap,
> +				  bool *wake_ci, bool *wake_mdsc);
> +extern void ceph_remove_capsnap(struct inode *inode,
> +				struct ceph_cap_snap *capsnap,
> +				bool *wake_ci, bool *wake_mdsc);
>  extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>  			     struct ceph_mds_session **psession);
>  extern bool __ceph_should_report_size(struct ceph_inode_info *ci);

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps
  2021-08-25 13:45 [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps xiubli
                   ` (2 preceding siblings ...)
  2021-08-25 13:45 ` [PATCH v3 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli
@ 2021-08-25 17:16 ` Jeff Layton
  2021-08-26  1:39   ` Xiubo Li
  3 siblings, 1 reply; 8+ messages in thread
From: Jeff Layton @ 2021-08-25 17:16 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, ukernel, pdonnell, ceph-devel

On Wed, 2021-08-25 at 21:45 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> V3:
> - fix one crash bug in the first patch.
> 
> V2:
> - minor fixes to clean up the code from Jeff's comments, thanks
> - swith to use lockdep_assert_held().
> 
> 
> 
> Test this for around 5 hours and this patch series worked well for me, my test script is:
> 
> $ while [ 1 ]; do date; for d in A B C; do (for i in {1..3}; do ./bin/mount.ceph :/ /mnt/kcephfs.$d -o noshare; rm -rf /mnt/kcephfs.$d/file$i.txt; rmdir /mnt/kcephfs.$d/.snap/snap$i; dd if=/dev/zero of=/mnt/kcephfs.$d/file$i.txt bs=1M count=8; mkdir -p /mnt/kcephfs.$d/.snap/snap$i; umount -fl /mnt/kcephfs.$d; done ) & done; wait; date; done
> 
> 
> 
> Xiubo Li (3):
>   ceph: remove the capsnaps when removing the caps
>   ceph: don't WARN if we're force umounting
>   ceph: don't WARN if we're iterate removing the session caps
> 
>  fs/ceph/caps.c       | 106 ++++++++++++++++++++++++++++++++-----------
>  fs/ceph/mds_client.c |  40 ++++++++++++++--
>  fs/ceph/super.h      |   7 +++
>  3 files changed, 123 insertions(+), 30 deletions(-)
> 

This looks good overall. I made a small change to the first patch to
turn the old BUG_ON into a WARN_ON_ONCE. I didn't see the need to crash
the box in that case.

Also, I revised the changelogs and a couple of comments. Let me know if
you see any issues with the changes I merged into "testing".

Thanks!
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-25 14:25   ` Jeff Layton
@ 2021-08-26  0:48     ` Xiubo Li
  0 siblings, 0 replies; 8+ messages in thread
From: Xiubo Li @ 2021-08-26  0:48 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, ukernel, pdonnell, ceph-devel


On 8/25/21 10:25 PM, Jeff Layton wrote:
> On Wed, 2021-08-25 at 21:45 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> The capsnaps will ihold the inodes when queuing to flush, so when
>> force umounting it will close the sessions first and if the MDSes
>> respond very fast and the session connections are closed just
>> before killing the superblock, which will flush the msgr queue,
>> then the flush capsnap callback won't ever be called, which will
>> lead the memory leak bug for the ceph_inode_info.
>>
>> URL: https://tracker.ceph.com/issues/52295
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/caps.c       | 67 +++++++++++++++++++++++++++++++++-----------
>>   fs/ceph/mds_client.c | 31 +++++++++++++++++++-
>>   fs/ceph/super.h      |  6 ++++
>>   3 files changed, 86 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 1e6261a16fb5..61326b490b2b 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3162,7 +3162,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>>   				break;
>>   			}
>>   		}
>> -		BUG_ON(!found);
>> +
>> +		/*
>> +		 * The capsnap should already be removed when
>> +		 * removing auth cap in case likes force unmount.
>> +		 */
>> +		BUG_ON(!found && ci->i_auth_cap);
>> +		if (!found)
>> +			goto unlock;
>> +
>>   		capsnap->dirty_pages -= nr;
>>   		if (capsnap->dirty_pages == 0) {
>>   			complete_capsnap = true;
>> @@ -3184,6 +3192,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>>   		     complete_capsnap ? " (complete capsnap)" : "");
>>   	}
>>   
>> +unlock:
>>   	spin_unlock(&ci->i_ceph_lock);
>>   
>>   	if (last) {
>> @@ -3658,6 +3667,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>   		iput(inode);
>>   }
>>   
>> +void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
>> +			   bool *wake_ci, bool *wake_mdsc)
>> +{
>> +	struct ceph_inode_info *ci = ceph_inode(inode);
>> +	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>> +	bool ret;
>> +
>> +	lockdep_assert_held(&ci->i_ceph_lock);
> Hmm, your earlier patch had a note saying that the s_mutex needed to he
> held here too. Is that not the case?

The s_mutex is not needed here, I meant the i_ceph_lock and the comment 
was just copied from somewhere and forgot to modify it.



>
>> +
>> +	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
>> +
>> +	list_del_init(&capsnap->ci_item);
>> +	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
>> +	if (wake_ci)
>> +		*wake_ci = ret;
>> +
>> +	spin_lock(&mdsc->cap_dirty_lock);
>> +	if (list_empty(&ci->i_cap_flush_list))
>> +		list_del_init(&ci->i_flushing_item);
>> +
>> +	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
>> +	if (wake_mdsc)
>> +		*wake_mdsc = ret;
>> +	spin_unlock(&mdsc->cap_dirty_lock);
>> +}
>> +
>> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
>> +			 bool *wake_ci, bool *wake_mdsc)
>> +{
>> +	struct ceph_inode_info *ci = ceph_inode(inode);
>> +
>> +	lockdep_assert_held(&ci->i_ceph_lock);
>> +
>> +	WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
>> +	__ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
>> +}
>> +
>>   /*
>>    * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
>>    * throw away our cap_snap.
>> @@ -3695,23 +3741,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
>>   			     capsnap, capsnap->follows);
>>   		}
>>   	}
>> -	if (flushed) {
>> -		WARN_ON(capsnap->dirty_pages || capsnap->writing);
>> -		dout(" removing %p cap_snap %p follows %lld\n",
>> -		     inode, capsnap, follows);
>> -		list_del(&capsnap->ci_item);
>> -		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
>> -
>> -		spin_lock(&mdsc->cap_dirty_lock);
>> -
>> -		if (list_empty(&ci->i_cap_flush_list))
>> -			list_del_init(&ci->i_flushing_item);
>> -
>> -		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
>> -							  &capsnap->cap_flush);
>> -		spin_unlock(&mdsc->cap_dirty_lock);
>> -	}
>> +	if (flushed)
>> +		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
>>   	spin_unlock(&ci->i_ceph_lock);
>> +
>>   	if (flushed) {
>>   		ceph_put_snap_context(capsnap->context);
>>   		ceph_put_cap_snap(capsnap);
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index df3a735f7837..36ad0ebb2295 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1604,14 +1604,39 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>>   	return ret;
>>   }
>>   
>> +static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
>> +{
>> +	struct ceph_inode_info *ci = ceph_inode(inode);
>> +	struct ceph_cap_snap *capsnap;
>> +	int capsnap_release = 0;
>> +
>> +	lockdep_assert_held(&ci->i_ceph_lock);
>> +
>> +	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
>> +
>> +	while (!list_empty(&ci->i_cap_snaps)) {
>> +		capsnap = list_first_entry(&ci->i_cap_snaps,
>> +					   struct ceph_cap_snap, ci_item);
>> +		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
>> +		ceph_put_snap_context(capsnap->context);
>> +		ceph_put_cap_snap(capsnap);
>> +		capsnap_release++;
>> +	}
>> +	wake_up_all(&ci->i_cap_wq);
>> +	wake_up_all(&mdsc->cap_flushing_wq);
>> +	return capsnap_release;
>> +}
>> +
>>   static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   				  void *arg)
>>   {
>>   	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>> +	struct ceph_mds_client *mdsc = fsc->mdsc;
>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>>   	LIST_HEAD(to_remove);
>>   	bool dirty_dropped = false;
>>   	bool invalidate = false;
>> +	int capsnap_release = 0;
>>   
>>   	dout("removing cap %p, ci is %p, inode is %p\n",
>>   	     cap, ci, &ci->vfs_inode);
>> @@ -1619,7 +1644,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   	__ceph_remove_cap(cap, false);
>>   	if (!ci->i_auth_cap) {
>>   		struct ceph_cap_flush *cf;
>> -		struct ceph_mds_client *mdsc = fsc->mdsc;
>>   
>>   		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
>>   			if (inode->i_data.nrpages > 0)
>> @@ -1683,6 +1707,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
>>   			ci->i_prealloc_cap_flush = NULL;
>>   		}
>> +
>> +		if (!list_empty(&ci->i_cap_snaps))
>> +			capsnap_release = remove_capsnaps(mdsc, inode);
>>   	}
>>   	spin_unlock(&ci->i_ceph_lock);
>>   	while (!list_empty(&to_remove)) {
>> @@ -1699,6 +1726,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   		ceph_queue_invalidate(inode);
>>   	if (dirty_dropped)
>>   		iput(inode);
>> +	while (capsnap_release--)
>> +		iput(inode);
>>   	return 0;
>>   }
>>   
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 8f4f2747be65..445d13d760d1 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -1169,6 +1169,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
>>   					    int had);
>>   extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>>   				       struct ceph_snap_context *snapc);
>> +extern void __ceph_remove_capsnap(struct inode *inode,
>> +				  struct ceph_cap_snap *capsnap,
>> +				  bool *wake_ci, bool *wake_mdsc);
>> +extern void ceph_remove_capsnap(struct inode *inode,
>> +				struct ceph_cap_snap *capsnap,
>> +				bool *wake_ci, bool *wake_mdsc);
>>   extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>>   			     struct ceph_mds_session **psession);
>>   extern bool __ceph_should_report_size(struct ceph_inode_info *ci);


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps
  2021-08-25 17:16 ` [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps Jeff Layton
@ 2021-08-26  1:39   ` Xiubo Li
  0 siblings, 0 replies; 8+ messages in thread
From: Xiubo Li @ 2021-08-26  1:39 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, ukernel, pdonnell, ceph-devel


On 8/26/21 1:16 AM, Jeff Layton wrote:
> On Wed, 2021-08-25 at 21:45 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> V3:
>> - fix one crash bug in the first patch.
>>
>> V2:
>> - minor fixes to clean up the code from Jeff's comments, thanks
>> - swith to use lockdep_assert_held().
>>
>>
>>
>> Test this for around 5 hours and this patch series worked well for me, my test script is:
>>
>> $ while [ 1 ]; do date; for d in A B C; do (for i in {1..3}; do ./bin/mount.ceph :/ /mnt/kcephfs.$d -o noshare; rm -rf /mnt/kcephfs.$d/file$i.txt; rmdir /mnt/kcephfs.$d/.snap/snap$i; dd if=/dev/zero of=/mnt/kcephfs.$d/file$i.txt bs=1M count=8; mkdir -p /mnt/kcephfs.$d/.snap/snap$i; umount -fl /mnt/kcephfs.$d; done ) & done; wait; date; done
>>
>>
>>
>> Xiubo Li (3):
>>    ceph: remove the capsnaps when removing the caps
>>    ceph: don't WARN if we're force umounting
>>    ceph: don't WARN if we're iterate removing the session caps
>>
>>   fs/ceph/caps.c       | 106 ++++++++++++++++++++++++++++++++-----------
>>   fs/ceph/mds_client.c |  40 ++++++++++++++--
>>   fs/ceph/super.h      |   7 +++
>>   3 files changed, 123 insertions(+), 30 deletions(-)
>>
> This looks good overall. I made a small change to the first patch to
> turn the old BUG_ON into a WARN_ON_ONCE. I didn't see the need to crash
> the box in that case.

That looks good to me.


>
> Also, I revised the changelogs and a couple of comments. Let me know if
> you see any issues with the changes I merged into "testing".

This LGTM too.

Thanks Jeff.


>
> Thanks!


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-08-26  1:39 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-25 13:45 [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps xiubli
2021-08-25 13:45 ` [PATCH v3 1/3] " xiubli
2021-08-25 14:25   ` Jeff Layton
2021-08-26  0:48     ` Xiubo Li
2021-08-25 13:45 ` [PATCH v3 2/3] ceph: don't WARN if we're force umounting xiubli
2021-08-25 13:45 ` [PATCH v3 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli
2021-08-25 17:16 ` [PATCH v3 0/3] ceph: remove the capsnaps when removing the caps Jeff Layton
2021-08-26  1:39   ` Xiubo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).