ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] ceph: remove the capsnaps when removing the caps
@ 2021-08-18  8:06 xiubli
  2021-08-18  8:06 ` [PATCH 1/3] " xiubli
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: xiubli @ 2021-08-18  8:06 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Fix the memory leak for the ceph_inode_info and two warnings.


Xiubo Li (3):
  ceph: remove the capsnaps when removing the caps
  ceph: don't WARN if we're force umounting
  ceph: don't WARN if we're iterate removing the session caps

 fs/ceph/caps.c       | 62 ++++++++++++++++++++++++++++----------------
 fs/ceph/mds_client.c | 36 +++++++++++++++++++++----
 fs/ceph/super.h      |  6 ++++-
 3 files changed, 75 insertions(+), 29 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-18  8:06 [PATCH 0/3] ceph: remove the capsnaps when removing the caps xiubli
@ 2021-08-18  8:06 ` xiubli
  2021-08-23 13:47   ` Jeff Layton
  2021-08-23 14:58   ` Jeff Layton
  2021-08-18  8:06 ` [PATCH 2/3] ceph: don't WARN if we're force umounting xiubli
  2021-08-18  8:06 ` [PATCH 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli
  2 siblings, 2 replies; 12+ messages in thread
From: xiubli @ 2021-08-18  8:06 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

The capsnaps will ihold the inodes when queuing to flush, so when
force umounting it will close the sessions first and if the MDSes
respond very fast and the session connections are closed just
before killing the superblock, which will flush the msgr queue,
then the flush capsnap callback won't ever be called, which will
lead the memory leak bug for the ceph_inode_info.

URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 47 +++++++++++++++++++++++++++++---------------
 fs/ceph/mds_client.c | 23 +++++++++++++++++++++-
 fs/ceph/super.h      |  3 +++
 3 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e239f06babbc..7def99fbdca6 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3663,6 +3663,34 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 		iput(inode);
 }
 
+/*
+ * Caller hold s_mutex and i_ceph_lock.
+ */
+void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+			 bool *wake_ci, bool *wake_mdsc)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	bool ret;
+
+	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
+
+	WARN_ON(capsnap->dirty_pages || capsnap->writing);
+	list_del(&capsnap->ci_item);
+	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
+	if (wake_ci)
+		*wake_ci = ret;
+
+	spin_lock(&mdsc->cap_dirty_lock);
+	if (list_empty(&ci->i_cap_flush_list))
+		list_del_init(&ci->i_flushing_item);
+
+	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
+	if (wake_mdsc)
+		*wake_mdsc = ret;
+	spin_unlock(&mdsc->cap_dirty_lock);
+}
+
 /*
  * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
  * throw away our cap_snap.
@@ -3700,23 +3728,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 			     capsnap, capsnap->follows);
 		}
 	}
-	if (flushed) {
-		WARN_ON(capsnap->dirty_pages || capsnap->writing);
-		dout(" removing %p cap_snap %p follows %lld\n",
-		     inode, capsnap, follows);
-		list_del(&capsnap->ci_item);
-		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
-
-		spin_lock(&mdsc->cap_dirty_lock);
-
-		if (list_empty(&ci->i_cap_flush_list))
-			list_del_init(&ci->i_flushing_item);
-
-		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
-							  &capsnap->cap_flush);
-		spin_unlock(&mdsc->cap_dirty_lock);
-	}
+	if (flushed)
+		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
 	spin_unlock(&ci->i_ceph_lock);
+
 	if (flushed) {
 		ceph_put_snap_context(capsnap->context);
 		ceph_put_cap_snap(capsnap);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fa4c0fe294c1..a632e1c7cef2 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1604,10 +1604,30 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
 	return ret;
 }
 
+static void remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap_snap *capsnap;
+
+	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+
+	while (!list_empty(&ci->i_cap_snaps)) {
+		capsnap = list_first_entry(&ci->i_cap_snaps,
+					   struct ceph_cap_snap, ci_item);
+		ceph_remove_capsnap(inode, capsnap, NULL, NULL);
+		ceph_put_snap_context(capsnap->context);
+		ceph_put_cap_snap(capsnap);
+		iput(inode);
+	}
+	wake_up_all(&ci->i_cap_wq);
+	wake_up_all(&mdsc->cap_flushing_wq);
+}
+
 static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 				  void *arg)
 {
 	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	LIST_HEAD(to_remove);
 	bool dirty_dropped = false;
@@ -1619,7 +1639,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	__ceph_remove_cap(cap, false);
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
-		struct ceph_mds_client *mdsc = fsc->mdsc;
 
 		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 			if (inode->i_data.nrpages > 0)
@@ -1684,6 +1703,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 			ci->i_prealloc_cap_flush = NULL;
 		}
 	}
+	if (!list_empty(&ci->i_cap_snaps))
+		remove_capsnaps(mdsc, inode);
 	spin_unlock(&ci->i_ceph_lock);
 	while (!list_empty(&to_remove)) {
 		struct ceph_cap_flush *cf;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 0bc36cf4c683..51ec17d12b26 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1168,6 +1168,9 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
 					    int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
+extern void ceph_remove_capsnap(struct inode *inode,
+				struct ceph_cap_snap *capsnap,
+				bool *wake_ci, bool *wake_mdsc);
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
 			     struct ceph_mds_session **psession);
 extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] ceph: don't WARN if we're force umounting
  2021-08-18  8:06 [PATCH 0/3] ceph: remove the capsnaps when removing the caps xiubli
  2021-08-18  8:06 ` [PATCH 1/3] " xiubli
@ 2021-08-18  8:06 ` xiubli
  2021-08-23 13:49   ` Jeff Layton
  2021-08-18  8:06 ` [PATCH 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli
  2 siblings, 1 reply; 12+ messages in thread
From: xiubli @ 2021-08-18  8:06 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Force umount will try to close the sessions by setting the session
state to _CLOSING, so in ceph_kill_sb after that it will warn on it.

URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a632e1c7cef2..0302af53e079 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4558,6 +4558,8 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 
 bool check_session_state(struct ceph_mds_session *s)
 {
+	struct ceph_fs_client *fsc = s->s_mdsc->fsc;
+
 	switch (s->s_state) {
 	case CEPH_MDS_SESSION_OPEN:
 		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
@@ -4566,8 +4568,11 @@ bool check_session_state(struct ceph_mds_session *s)
 		}
 		break;
 	case CEPH_MDS_SESSION_CLOSING:
-		/* Should never reach this when we're unmounting */
-		WARN_ON_ONCE(s->s_ttl);
+		/*
+		 * Should never reach this when none force unmounting
+		 */
+		if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN)
+			WARN_ON_ONCE(s->s_ttl);
 		fallthrough;
 	case CEPH_MDS_SESSION_NEW:
 	case CEPH_MDS_SESSION_RESTARTING:
-- 
2.27.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/3] ceph: don't WARN if we're iterate removing the session caps
  2021-08-18  8:06 [PATCH 0/3] ceph: remove the capsnaps when removing the caps xiubli
  2021-08-18  8:06 ` [PATCH 1/3] " xiubli
  2021-08-18  8:06 ` [PATCH 2/3] ceph: don't WARN if we're force umounting xiubli
@ 2021-08-18  8:06 ` xiubli
  2021-08-23 13:59   ` Jeff Layton
  2 siblings, 1 reply; 12+ messages in thread
From: xiubli @ 2021-08-18  8:06 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

For example in case force umounting it will remove all the session
caps one by one even it's dirty cap.

URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 15 ++++++++-------
 fs/ceph/mds_client.c |  4 ++--
 fs/ceph/super.h      |  3 ++-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7def99fbdca6..1ed9b9d57dd3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1101,7 +1101,7 @@ int ceph_is_any_caps(struct inode *inode)
  * caller should hold i_ceph_lock.
  * caller will not hold session s_mutex if called from destroy_inode.
  */
-void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release, bool warn)
 {
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_inode_info *ci = cap->ci;
@@ -1121,7 +1121,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 	/* remove from inode's cap rbtree, and clear auth cap */
 	rb_erase(&cap->ci_node, &ci->i_caps);
 	if (ci->i_auth_cap == cap) {
-		WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
+		WARN_ON_ONCE(warn && !list_empty(&ci->i_dirty_item) &&
 			     !mdsc->fsc->blocklisted);
 		ci->i_auth_cap = NULL;
 	}
@@ -1304,7 +1304,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
 	while (p) {
 		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
 		p = rb_next(p);
-		__ceph_remove_cap(cap, true);
+		__ceph_remove_cap(cap, true, true);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 }
@@ -3815,7 +3815,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		goto out_unlock;
 
 	if (target < 0) {
-		__ceph_remove_cap(cap, false);
+		__ceph_remove_cap(cap, false, true);
 		goto out_unlock;
 	}
 
@@ -3850,7 +3850,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 				change_auth_cap_ses(ci, tcap->session);
 			}
 		}
-		__ceph_remove_cap(cap, false);
+		__ceph_remove_cap(cap, false, true);
 		goto out_unlock;
 	} else if (tsession) {
 		/* add placeholder for the export tagert */
@@ -3867,7 +3867,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 			spin_unlock(&mdsc->cap_dirty_lock);
 		}
 
-		__ceph_remove_cap(cap, false);
+		__ceph_remove_cap(cap, false, true);
 		goto out_unlock;
 	}
 
@@ -3978,7 +3978,8 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 					ocap->mseq, mds, le32_to_cpu(ph->seq),
 					le32_to_cpu(ph->mseq));
 		}
-		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE),
+				  true);
 	}
 
 	*old_issued = issued;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0302af53e079..d99ec2618585 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
 	spin_lock(&ci->i_ceph_lock);
-	__ceph_remove_cap(cap, false);
+	__ceph_remove_cap(cap, false, false);
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
 
@@ -2008,7 +2008,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
-		__ceph_remove_cap(cap, true);
+		__ceph_remove_cap(cap, true, true);
 		(*remaining)--;
 	} else {
 		struct dentry *dentry;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 51ec17d12b26..106ddfd1ce92 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1142,7 +1142,8 @@ extern void ceph_add_cap(struct inode *inode,
 			 unsigned issued, unsigned wanted,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
-extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release,
+			      bool warn);
 extern void __ceph_remove_caps(struct ceph_inode_info *ci);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-18  8:06 ` [PATCH 1/3] " xiubli
@ 2021-08-23 13:47   ` Jeff Layton
  2021-08-24  1:04     ` Xiubo Li
  2021-08-23 14:58   ` Jeff Layton
  1 sibling, 1 reply; 12+ messages in thread
From: Jeff Layton @ 2021-08-23 13:47 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, pdonnell, ceph-devel

On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> The capsnaps will ihold the inodes when queuing to flush, so when
> force umounting it will close the sessions first and if the MDSes
> respond very fast and the session connections are closed just
> before killing the superblock, which will flush the msgr queue,
> then the flush capsnap callback won't ever be called, which will
> lead the memory leak bug for the ceph_inode_info.
> 
> URL: https://tracker.ceph.com/issues/52295
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c       | 47 +++++++++++++++++++++++++++++---------------
>  fs/ceph/mds_client.c | 23 +++++++++++++++++++++-
>  fs/ceph/super.h      |  3 +++
>  3 files changed, 56 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index e239f06babbc..7def99fbdca6 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3663,6 +3663,34 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>  		iput(inode);
>  }
>  
> +/*
> + * Caller hold s_mutex and i_ceph_lock.
> + */

Why add comments like this when we have lockdep_assert_held() ? It's
generally better to use that as they illustrate the same relationship
and also help catch those who violate the rules.

> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
> +			 bool *wake_ci, bool *wake_mdsc)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> +	bool ret;
> +
> +	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
> +
> +	WARN_ON(capsnap->dirty_pages || capsnap->writing);
> +	list_del(&capsnap->ci_item);
> +	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> +	if (wake_ci)
> +		*wake_ci = ret;
> +
> +	spin_lock(&mdsc->cap_dirty_lock);
> +	if (list_empty(&ci->i_cap_flush_list))
> +		list_del_init(&ci->i_flushing_item);
> +
> +	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
> +	if (wake_mdsc)
> +		*wake_mdsc = ret;
> +	spin_unlock(&mdsc->cap_dirty_lock);
> +}
> +
>  /*
>   * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
>   * throw away our cap_snap.
> @@ -3700,23 +3728,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
>  			     capsnap, capsnap->follows);
>  		}
>  	}
> -	if (flushed) {
> -		WARN_ON(capsnap->dirty_pages || capsnap->writing);
> -		dout(" removing %p cap_snap %p follows %lld\n",
> -		     inode, capsnap, follows);
> -		list_del(&capsnap->ci_item);
> -		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> -
> -		spin_lock(&mdsc->cap_dirty_lock);
> -
> -		if (list_empty(&ci->i_cap_flush_list))
> -			list_del_init(&ci->i_flushing_item);
> -
> -		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
> -							  &capsnap->cap_flush);
> -		spin_unlock(&mdsc->cap_dirty_lock);
> -	}
> +	if (flushed)
> +		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
>  	spin_unlock(&ci->i_ceph_lock);
> +
>  	if (flushed) {
>  		ceph_put_snap_context(capsnap->context);
>  		ceph_put_cap_snap(capsnap);
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index fa4c0fe294c1..a632e1c7cef2 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1604,10 +1604,30 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  	return ret;
>  }
>  
> +static void remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_cap_snap *capsnap;
> +
> +	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
> +
> +	while (!list_empty(&ci->i_cap_snaps)) {
> +		capsnap = list_first_entry(&ci->i_cap_snaps,
> +					   struct ceph_cap_snap, ci_item);
> +		ceph_remove_capsnap(inode, capsnap, NULL, NULL);
> +		ceph_put_snap_context(capsnap->context);
> +		ceph_put_cap_snap(capsnap);
> +		iput(inode);
> +	}
> +	wake_up_all(&ci->i_cap_wq);
> +	wake_up_all(&mdsc->cap_flushing_wq);
> +}
> +
>  static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  				  void *arg)
>  {
>  	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
> +	struct ceph_mds_client *mdsc = fsc->mdsc;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	LIST_HEAD(to_remove);
>  	bool dirty_dropped = false;
> @@ -1619,7 +1639,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	__ceph_remove_cap(cap, false);
>  	if (!ci->i_auth_cap) {
>  		struct ceph_cap_flush *cf;
> -		struct ceph_mds_client *mdsc = fsc->mdsc;
>  
>  		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
>  			if (inode->i_data.nrpages > 0)
> @@ -1684,6 +1703,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  			ci->i_prealloc_cap_flush = NULL;
>  		}
>  	}
> +	if (!list_empty(&ci->i_cap_snaps))
> +		remove_capsnaps(mdsc, inode);
>  	spin_unlock(&ci->i_ceph_lock);
>  	while (!list_empty(&to_remove)) {
>  		struct ceph_cap_flush *cf;
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 0bc36cf4c683..51ec17d12b26 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1168,6 +1168,9 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
>  					    int had);
>  extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>  				       struct ceph_snap_context *snapc);
> +extern void ceph_remove_capsnap(struct inode *inode,
> +				struct ceph_cap_snap *capsnap,
> +				bool *wake_ci, bool *wake_mdsc);
>  extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>  			     struct ceph_mds_session **psession);
>  extern bool __ceph_should_report_size(struct ceph_inode_info *ci);

Patch looks reasonable otherwise.
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] ceph: don't WARN if we're force umounting
  2021-08-18  8:06 ` [PATCH 2/3] ceph: don't WARN if we're force umounting xiubli
@ 2021-08-23 13:49   ` Jeff Layton
  2021-08-24  1:07     ` Xiubo Li
  0 siblings, 1 reply; 12+ messages in thread
From: Jeff Layton @ 2021-08-23 13:49 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, pdonnell, ceph-devel

On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Force umount will try to close the sessions by setting the session
> state to _CLOSING, so in ceph_kill_sb after that it will warn on it.
> 
> URL: https://tracker.ceph.com/issues/52295
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index a632e1c7cef2..0302af53e079 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -4558,6 +4558,8 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
>  
>  bool check_session_state(struct ceph_mds_session *s)
>  {
> +	struct ceph_fs_client *fsc = s->s_mdsc->fsc;
> +
>  	switch (s->s_state) {
>  	case CEPH_MDS_SESSION_OPEN:
>  		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
> @@ -4566,8 +4568,11 @@ bool check_session_state(struct ceph_mds_session *s)
>  		}
>  		break;
>  	case CEPH_MDS_SESSION_CLOSING:
> -		/* Should never reach this when we're unmounting */
> -		WARN_ON_ONCE(s->s_ttl);
> +		/*
> +		 * Should never reach this when none force unmounting
> +		 */
> +		if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN)
> +			WARN_ON_ONCE(s->s_ttl);

How about something like this instead?

    WARN_ON_ONCE(s->s_ttl && READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);


>  		fallthrough;
>  	case CEPH_MDS_SESSION_NEW:
>  	case CEPH_MDS_SESSION_RESTARTING:

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] ceph: don't WARN if we're iterate removing the session caps
  2021-08-18  8:06 ` [PATCH 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli
@ 2021-08-23 13:59   ` Jeff Layton
  2021-08-24  1:43     ` Xiubo Li
  0 siblings, 1 reply; 12+ messages in thread
From: Jeff Layton @ 2021-08-23 13:59 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, pdonnell, ceph-devel

On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> For example in case force umounting it will remove all the session
> caps one by one even it's dirty cap.
> 
> URL: https://tracker.ceph.com/issues/52295
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c       | 15 ++++++++-------
>  fs/ceph/mds_client.c |  4 ++--
>  fs/ceph/super.h      |  3 ++-
>  3 files changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 7def99fbdca6..1ed9b9d57dd3 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1101,7 +1101,7 @@ int ceph_is_any_caps(struct inode *inode)
>   * caller should hold i_ceph_lock.
>   * caller will not hold session s_mutex if called from destroy_inode.
>   */
> -void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
> +void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release, bool warn)

I think it'd be better to refactor __ceph_remove_cap so that it has a
wrapper function that does the WARN_ON_ONCE call under the right
conditions.

Maybe add a ceph_remove_cap() that does:

	WARN_ON_ONCE(ci && ci->i_auth_cap == cap &&
		     !list_empty(&ci->i_dirty_item) &&
		     !mdsc->fsc->blocklisted);

...and then calls __ceph_remove_cap(). Then you could have the ones that
set "warn" to false call __ceph_remove_cap() and the others would call
ceph_remove_cap(). That's at least a little less ugly (and more
efficient).

Alternately, I guess you could try to test what state the session is in
and only warn if it's not being force-unmounted?

>  {
>  	struct ceph_mds_session *session = cap->session;
>  	struct ceph_inode_info *ci = cap->ci;
> @@ -1121,7 +1121,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>  	/* remove from inode's cap rbtree, and clear auth cap */
>  	rb_erase(&cap->ci_node, &ci->i_caps);
>  	if (ci->i_auth_cap == cap) {
> -		WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
> +		WARN_ON_ONCE(warn && !list_empty(&ci->i_dirty_item) &&
>  			     !mdsc->fsc->blocklisted);
>  		ci->i_auth_cap = NULL;
>  	}
> @@ -1304,7 +1304,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
>  	while (p) {
>  		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
>  		p = rb_next(p);
> -		__ceph_remove_cap(cap, true);
> +		__ceph_remove_cap(cap, true, true);
>  	}
>  	spin_unlock(&ci->i_ceph_lock);
>  }
> @@ -3815,7 +3815,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>  		goto out_unlock;
>  
>  	if (target < 0) {
> -		__ceph_remove_cap(cap, false);
> +		__ceph_remove_cap(cap, false, true);
>  		goto out_unlock;
>  	}
>  
> @@ -3850,7 +3850,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>  				change_auth_cap_ses(ci, tcap->session);
>  			}
>  		}
> -		__ceph_remove_cap(cap, false);
> +		__ceph_remove_cap(cap, false, true);
>  		goto out_unlock;
>  	} else if (tsession) {
>  		/* add placeholder for the export tagert */
> @@ -3867,7 +3867,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>  			spin_unlock(&mdsc->cap_dirty_lock);
>  		}
>  
> -		__ceph_remove_cap(cap, false);
> +		__ceph_remove_cap(cap, false, true);
>  		goto out_unlock;
>  	}
>  
> @@ -3978,7 +3978,8 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
>  					ocap->mseq, mds, le32_to_cpu(ph->seq),
>  					le32_to_cpu(ph->mseq));
>  		}
> -		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
> +		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE),
> +				  true);
>  	}
>  
>  	*old_issued = issued;
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 0302af53e079..d99ec2618585 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	dout("removing cap %p, ci is %p, inode is %p\n",
>  	     cap, ci, &ci->vfs_inode);
>  	spin_lock(&ci->i_ceph_lock);
> -	__ceph_remove_cap(cap, false);
> +	__ceph_remove_cap(cap, false, false);
>  	if (!ci->i_auth_cap) {
>  		struct ceph_cap_flush *cf;
>  
> @@ -2008,7 +2008,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>  
>  	if (oissued) {
>  		/* we aren't the only cap.. just remove us */
> -		__ceph_remove_cap(cap, true);
> +		__ceph_remove_cap(cap, true, true);
>  		(*remaining)--;
>  	} else {
>  		struct dentry *dentry;
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 51ec17d12b26..106ddfd1ce92 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1142,7 +1142,8 @@ extern void ceph_add_cap(struct inode *inode,
>  			 unsigned issued, unsigned wanted,
>  			 unsigned cap, unsigned seq, u64 realmino, int flags,
>  			 struct ceph_cap **new_cap);
> -extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
> +extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release,
> +			      bool warn);
>  extern void __ceph_remove_caps(struct ceph_inode_info *ci);
>  extern void ceph_put_cap(struct ceph_mds_client *mdsc,
>  			 struct ceph_cap *cap);

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-18  8:06 ` [PATCH 1/3] " xiubli
  2021-08-23 13:47   ` Jeff Layton
@ 2021-08-23 14:58   ` Jeff Layton
  2021-08-24  1:05     ` Xiubo Li
  1 sibling, 1 reply; 12+ messages in thread
From: Jeff Layton @ 2021-08-23 14:58 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, pdonnell, ceph-devel

On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> The capsnaps will ihold the inodes when queuing to flush, so when
> force umounting it will close the sessions first and if the MDSes
> respond very fast and the session connections are closed just
> before killing the superblock, which will flush the msgr queue,
> then the flush capsnap callback won't ever be called, which will
> lead the memory leak bug for the ceph_inode_info.
> 
> URL: https://tracker.ceph.com/issues/52295
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c       | 47 +++++++++++++++++++++++++++++---------------
>  fs/ceph/mds_client.c | 23 +++++++++++++++++++++-
>  fs/ceph/super.h      |  3 +++
>  3 files changed, 56 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index e239f06babbc..7def99fbdca6 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3663,6 +3663,34 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>  		iput(inode);
>  }
>  
> +/*
> + * Caller hold s_mutex and i_ceph_lock.
> + */
> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
> +			 bool *wake_ci, bool *wake_mdsc)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> +	bool ret;
> +
> +	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
> +
> +	WARN_ON(capsnap->dirty_pages || capsnap->writing);

Can we make this a WARN_ON_ONCE too?

> +	list_del(&capsnap->ci_item);
> +	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> +	if (wake_ci)
> +		*wake_ci = ret;
> +
> +	spin_lock(&mdsc->cap_dirty_lock);
> +	if (list_empty(&ci->i_cap_flush_list))
> +		list_del_init(&ci->i_flushing_item);
> +
> +	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
> +	if (wake_mdsc)
> +		*wake_mdsc = ret;
> +	spin_unlock(&mdsc->cap_dirty_lock);
> +}
> +
>  /*
>   * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
>   * throw away our cap_snap.
> @@ -3700,23 +3728,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
>  			     capsnap, capsnap->follows);
>  		}
>  	}
> -	if (flushed) {
> -		WARN_ON(capsnap->dirty_pages || capsnap->writing);
> -		dout(" removing %p cap_snap %p follows %lld\n",
> -		     inode, capsnap, follows);
> -		list_del(&capsnap->ci_item);
> -		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> -
> -		spin_lock(&mdsc->cap_dirty_lock);
> -
> -		if (list_empty(&ci->i_cap_flush_list))
> -			list_del_init(&ci->i_flushing_item);
> -
> -		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
> -							  &capsnap->cap_flush);
> -		spin_unlock(&mdsc->cap_dirty_lock);
> -	}
> +	if (flushed)
> +		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
>  	spin_unlock(&ci->i_ceph_lock);
> +
>  	if (flushed) {
>  		ceph_put_snap_context(capsnap->context);
>  		ceph_put_cap_snap(capsnap);
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index fa4c0fe294c1..a632e1c7cef2 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1604,10 +1604,30 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  	return ret;
>  }
>  
> +static void remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_cap_snap *capsnap;
> +
> +	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
> +
> +	while (!list_empty(&ci->i_cap_snaps)) {
> +		capsnap = list_first_entry(&ci->i_cap_snaps,
> +					   struct ceph_cap_snap, ci_item);
> +		ceph_remove_capsnap(inode, capsnap, NULL, NULL);
> +		ceph_put_snap_context(capsnap->context);
> +		ceph_put_cap_snap(capsnap);
> +		iput(inode);
> +	}
> +	wake_up_all(&ci->i_cap_wq);
> +	wake_up_all(&mdsc->cap_flushing_wq);
> +}
> +
>  static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  				  void *arg)
>  {
>  	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
> +	struct ceph_mds_client *mdsc = fsc->mdsc;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	LIST_HEAD(to_remove);
>  	bool dirty_dropped = false;
> @@ -1619,7 +1639,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	__ceph_remove_cap(cap, false);
>  	if (!ci->i_auth_cap) {
>  		struct ceph_cap_flush *cf;
> -		struct ceph_mds_client *mdsc = fsc->mdsc;
>  
>  		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
>  			if (inode->i_data.nrpages > 0)
> @@ -1684,6 +1703,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  			ci->i_prealloc_cap_flush = NULL;
>  		}
>  	}
> +	if (!list_empty(&ci->i_cap_snaps))
> +		remove_capsnaps(mdsc, inode);
>  	spin_unlock(&ci->i_ceph_lock);
>  	while (!list_empty(&to_remove)) {
>  		struct ceph_cap_flush *cf;
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 0bc36cf4c683..51ec17d12b26 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1168,6 +1168,9 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
>  					    int had);
>  extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>  				       struct ceph_snap_context *snapc);
> +extern void ceph_remove_capsnap(struct inode *inode,
> +				struct ceph_cap_snap *capsnap,
> +				bool *wake_ci, bool *wake_mdsc);
>  extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>  			     struct ceph_mds_session **psession);
>  extern bool __ceph_should_report_size(struct ceph_inode_info *ci);

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-23 13:47   ` Jeff Layton
@ 2021-08-24  1:04     ` Xiubo Li
  0 siblings, 0 replies; 12+ messages in thread
From: Xiubo Li @ 2021-08-24  1:04 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, pdonnell, ceph-devel


On 8/23/21 9:47 PM, Jeff Layton wrote:
> On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> The capsnaps will ihold the inodes when queuing to flush, so when
>> force umounting it will close the sessions first and if the MDSes
>> respond very fast and the session connections are closed just
>> before killing the superblock, which will flush the msgr queue,
>> then the flush capsnap callback won't ever be called, which will
>> lead the memory leak bug for the ceph_inode_info.
>>
>> URL: https://tracker.ceph.com/issues/52295
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/caps.c       | 47 +++++++++++++++++++++++++++++---------------
>>   fs/ceph/mds_client.c | 23 +++++++++++++++++++++-
>>   fs/ceph/super.h      |  3 +++
>>   3 files changed, 56 insertions(+), 17 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index e239f06babbc..7def99fbdca6 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3663,6 +3663,34 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>   		iput(inode);
>>   }
>>   
>> +/*
>> + * Caller hold s_mutex and i_ceph_lock.
>> + */
> Why add comments like this when we have lockdep_assert_held() ? It's
> generally better to use that as they illustrate the same relationship
> and also help catch those who violate the rules.
>
Okay, I will switch to lockdep_assert_held().

Thanks

>> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
>> +			 bool *wake_ci, bool *wake_mdsc)
>> +{
>> +	struct ceph_inode_info *ci = ceph_inode(inode);
>> +	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>> +	bool ret;
>> +
>> +	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
>> +
>> +	WARN_ON(capsnap->dirty_pages || capsnap->writing);
>> +	list_del(&capsnap->ci_item);
>> +	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
>> +	if (wake_ci)
>> +		*wake_ci = ret;
>> +
>> +	spin_lock(&mdsc->cap_dirty_lock);
>> +	if (list_empty(&ci->i_cap_flush_list))
>> +		list_del_init(&ci->i_flushing_item);
>> +
>> +	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
>> +	if (wake_mdsc)
>> +		*wake_mdsc = ret;
>> +	spin_unlock(&mdsc->cap_dirty_lock);
>> +}
>> +
>>   /*
>>    * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
>>    * throw away our cap_snap.
>> @@ -3700,23 +3728,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
>>   			     capsnap, capsnap->follows);
>>   		}
>>   	}
>> -	if (flushed) {
>> -		WARN_ON(capsnap->dirty_pages || capsnap->writing);
>> -		dout(" removing %p cap_snap %p follows %lld\n",
>> -		     inode, capsnap, follows);
>> -		list_del(&capsnap->ci_item);
>> -		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
>> -
>> -		spin_lock(&mdsc->cap_dirty_lock);
>> -
>> -		if (list_empty(&ci->i_cap_flush_list))
>> -			list_del_init(&ci->i_flushing_item);
>> -
>> -		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
>> -							  &capsnap->cap_flush);
>> -		spin_unlock(&mdsc->cap_dirty_lock);
>> -	}
>> +	if (flushed)
>> +		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
>>   	spin_unlock(&ci->i_ceph_lock);
>> +
>>   	if (flushed) {
>>   		ceph_put_snap_context(capsnap->context);
>>   		ceph_put_cap_snap(capsnap);
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index fa4c0fe294c1..a632e1c7cef2 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1604,10 +1604,30 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>>   	return ret;
>>   }
>>   
>> +static void remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
>> +{
>> +	struct ceph_inode_info *ci = ceph_inode(inode);
>> +	struct ceph_cap_snap *capsnap;
>> +
>> +	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
>> +
>> +	while (!list_empty(&ci->i_cap_snaps)) {
>> +		capsnap = list_first_entry(&ci->i_cap_snaps,
>> +					   struct ceph_cap_snap, ci_item);
>> +		ceph_remove_capsnap(inode, capsnap, NULL, NULL);
>> +		ceph_put_snap_context(capsnap->context);
>> +		ceph_put_cap_snap(capsnap);
>> +		iput(inode);
>> +	}
>> +	wake_up_all(&ci->i_cap_wq);
>> +	wake_up_all(&mdsc->cap_flushing_wq);
>> +}
>> +
>>   static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   				  void *arg)
>>   {
>>   	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>> +	struct ceph_mds_client *mdsc = fsc->mdsc;
>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>>   	LIST_HEAD(to_remove);
>>   	bool dirty_dropped = false;
>> @@ -1619,7 +1639,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   	__ceph_remove_cap(cap, false);
>>   	if (!ci->i_auth_cap) {
>>   		struct ceph_cap_flush *cf;
>> -		struct ceph_mds_client *mdsc = fsc->mdsc;
>>   
>>   		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
>>   			if (inode->i_data.nrpages > 0)
>> @@ -1684,6 +1703,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   			ci->i_prealloc_cap_flush = NULL;
>>   		}
>>   	}
>> +	if (!list_empty(&ci->i_cap_snaps))
>> +		remove_capsnaps(mdsc, inode);
>>   	spin_unlock(&ci->i_ceph_lock);
>>   	while (!list_empty(&to_remove)) {
>>   		struct ceph_cap_flush *cf;
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 0bc36cf4c683..51ec17d12b26 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -1168,6 +1168,9 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
>>   					    int had);
>>   extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>>   				       struct ceph_snap_context *snapc);
>> +extern void ceph_remove_capsnap(struct inode *inode,
>> +				struct ceph_cap_snap *capsnap,
>> +				bool *wake_ci, bool *wake_mdsc);
>>   extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>>   			     struct ceph_mds_session **psession);
>>   extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
> Patch looks reasonable otherwise.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] ceph: remove the capsnaps when removing the caps
  2021-08-23 14:58   ` Jeff Layton
@ 2021-08-24  1:05     ` Xiubo Li
  0 siblings, 0 replies; 12+ messages in thread
From: Xiubo Li @ 2021-08-24  1:05 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, pdonnell, ceph-devel


On 8/23/21 10:58 PM, Jeff Layton wrote:
> On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> The capsnaps will ihold the inodes when queuing to flush, so when
>> force umounting it will close the sessions first and if the MDSes
>> respond very fast and the session connections are closed just
>> before killing the superblock, which will flush the msgr queue,
>> then the flush capsnap callback won't ever be called, which will
>> lead the memory leak bug for the ceph_inode_info.
>>
>> URL: https://tracker.ceph.com/issues/52295
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/caps.c       | 47 +++++++++++++++++++++++++++++---------------
>>   fs/ceph/mds_client.c | 23 +++++++++++++++++++++-
>>   fs/ceph/super.h      |  3 +++
>>   3 files changed, 56 insertions(+), 17 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index e239f06babbc..7def99fbdca6 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3663,6 +3663,34 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>   		iput(inode);
>>   }
>>   
>> +/*
>> + * Caller hold s_mutex and i_ceph_lock.
>> + */
>> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
>> +			 bool *wake_ci, bool *wake_mdsc)
>> +{
>> +	struct ceph_inode_info *ci = ceph_inode(inode);
>> +	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>> +	bool ret;
>> +
>> +	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
>> +
>> +	WARN_ON(capsnap->dirty_pages || capsnap->writing);
> Can we make this a WARN_ON_ONCE too?

Yeah, will fix it.

Thanks


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] ceph: don't WARN if we're force umounting
  2021-08-23 13:49   ` Jeff Layton
@ 2021-08-24  1:07     ` Xiubo Li
  0 siblings, 0 replies; 12+ messages in thread
From: Xiubo Li @ 2021-08-24  1:07 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, pdonnell, ceph-devel


On 8/23/21 9:49 PM, Jeff Layton wrote:
> On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Force umount will try to close the sessions by setting the session
>> state to _CLOSING, so in ceph_kill_sb after that it will warn on it.
>>
>> URL: https://tracker.ceph.com/issues/52295
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c | 9 +++++++--
>>   1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index a632e1c7cef2..0302af53e079 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -4558,6 +4558,8 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
>>   
>>   bool check_session_state(struct ceph_mds_session *s)
>>   {
>> +	struct ceph_fs_client *fsc = s->s_mdsc->fsc;
>> +
>>   	switch (s->s_state) {
>>   	case CEPH_MDS_SESSION_OPEN:
>>   		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
>> @@ -4566,8 +4568,11 @@ bool check_session_state(struct ceph_mds_session *s)
>>   		}
>>   		break;
>>   	case CEPH_MDS_SESSION_CLOSING:
>> -		/* Should never reach this when we're unmounting */
>> -		WARN_ON_ONCE(s->s_ttl);
>> +		/*
>> +		 * Should never reach this when none force unmounting
>> +		 */
>> +		if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN)
>> +			WARN_ON_ONCE(s->s_ttl);
> How about something like this instead?
>
>      WARN_ON_ONCE(s->s_ttl && READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);


This looks good to me too. Will fix it.

Thanks


>
>>   		fallthrough;
>>   	case CEPH_MDS_SESSION_NEW:
>>   	case CEPH_MDS_SESSION_RESTARTING:


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] ceph: don't WARN if we're iterate removing the session caps
  2021-08-23 13:59   ` Jeff Layton
@ 2021-08-24  1:43     ` Xiubo Li
  0 siblings, 0 replies; 12+ messages in thread
From: Xiubo Li @ 2021-08-24  1:43 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, pdonnell, ceph-devel


On 8/23/21 9:59 PM, Jeff Layton wrote:
> On Wed, 2021-08-18 at 16:06 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> For example in case force umounting it will remove all the session
>> caps one by one even it's dirty cap.
>>
>> URL: https://tracker.ceph.com/issues/52295
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/caps.c       | 15 ++++++++-------
>>   fs/ceph/mds_client.c |  4 ++--
>>   fs/ceph/super.h      |  3 ++-
>>   3 files changed, 12 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 7def99fbdca6..1ed9b9d57dd3 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -1101,7 +1101,7 @@ int ceph_is_any_caps(struct inode *inode)
>>    * caller should hold i_ceph_lock.
>>    * caller will not hold session s_mutex if called from destroy_inode.
>>    */
>> -void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>> +void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release, bool warn)
> I think it'd be better to refactor __ceph_remove_cap so that it has a
> wrapper function that does the WARN_ON_ONCE call under the right
> conditions.
>
> Maybe add a ceph_remove_cap() that does:
>
> 	WARN_ON_ONCE(ci && ci->i_auth_cap == cap &&
> 		     !list_empty(&ci->i_dirty_item) &&
> 		     !mdsc->fsc->blocklisted);
>
> ...and then calls __ceph_remove_cap(). Then you could have the ones that
> set "warn" to false call __ceph_remove_cap() and the others would call
> ceph_remove_cap(). That's at least a little less ugly (and more
> efficient).

Sure, make sense.

> Alternately, I guess you could try to test what state the session is in
> and only warn if it's not being force-unmounted?

I will try this later.

Thanks


>>   {
>>   	struct ceph_mds_session *session = cap->session;
>>   	struct ceph_inode_info *ci = cap->ci;
>> @@ -1121,7 +1121,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>>   	/* remove from inode's cap rbtree, and clear auth cap */
>>   	rb_erase(&cap->ci_node, &ci->i_caps);
>>   	if (ci->i_auth_cap == cap) {
>> -		WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
>> +		WARN_ON_ONCE(warn && !list_empty(&ci->i_dirty_item) &&
>>   			     !mdsc->fsc->blocklisted);
>>   		ci->i_auth_cap = NULL;
>>   	}
>> @@ -1304,7 +1304,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
>>   	while (p) {
>>   		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
>>   		p = rb_next(p);
>> -		__ceph_remove_cap(cap, true);
>> +		__ceph_remove_cap(cap, true, true);
>>   	}
>>   	spin_unlock(&ci->i_ceph_lock);
>>   }
>> @@ -3815,7 +3815,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>>   		goto out_unlock;
>>   
>>   	if (target < 0) {
>> -		__ceph_remove_cap(cap, false);
>> +		__ceph_remove_cap(cap, false, true);
>>   		goto out_unlock;
>>   	}
>>   
>> @@ -3850,7 +3850,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>>   				change_auth_cap_ses(ci, tcap->session);
>>   			}
>>   		}
>> -		__ceph_remove_cap(cap, false);
>> +		__ceph_remove_cap(cap, false, true);
>>   		goto out_unlock;
>>   	} else if (tsession) {
>>   		/* add placeholder for the export tagert */
>> @@ -3867,7 +3867,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>>   			spin_unlock(&mdsc->cap_dirty_lock);
>>   		}
>>   
>> -		__ceph_remove_cap(cap, false);
>> +		__ceph_remove_cap(cap, false, true);
>>   		goto out_unlock;
>>   	}
>>   
>> @@ -3978,7 +3978,8 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
>>   					ocap->mseq, mds, le32_to_cpu(ph->seq),
>>   					le32_to_cpu(ph->mseq));
>>   		}
>> -		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
>> +		__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE),
>> +				  true);
>>   	}
>>   
>>   	*old_issued = issued;
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 0302af53e079..d99ec2618585 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   	dout("removing cap %p, ci is %p, inode is %p\n",
>>   	     cap, ci, &ci->vfs_inode);
>>   	spin_lock(&ci->i_ceph_lock);
>> -	__ceph_remove_cap(cap, false);
>> +	__ceph_remove_cap(cap, false, false);
>>   	if (!ci->i_auth_cap) {
>>   		struct ceph_cap_flush *cf;
>>   
>> @@ -2008,7 +2008,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>>   
>>   	if (oissued) {
>>   		/* we aren't the only cap.. just remove us */
>> -		__ceph_remove_cap(cap, true);
>> +		__ceph_remove_cap(cap, true, true);
>>   		(*remaining)--;
>>   	} else {
>>   		struct dentry *dentry;
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 51ec17d12b26..106ddfd1ce92 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -1142,7 +1142,8 @@ extern void ceph_add_cap(struct inode *inode,
>>   			 unsigned issued, unsigned wanted,
>>   			 unsigned cap, unsigned seq, u64 realmino, int flags,
>>   			 struct ceph_cap **new_cap);
>> -extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
>> +extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release,
>> +			      bool warn);
>>   extern void __ceph_remove_caps(struct ceph_inode_info *ci);
>>   extern void ceph_put_cap(struct ceph_mds_client *mdsc,
>>   			 struct ceph_cap *cap);


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-08-24  1:44 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-18  8:06 [PATCH 0/3] ceph: remove the capsnaps when removing the caps xiubli
2021-08-18  8:06 ` [PATCH 1/3] " xiubli
2021-08-23 13:47   ` Jeff Layton
2021-08-24  1:04     ` Xiubo Li
2021-08-23 14:58   ` Jeff Layton
2021-08-24  1:05     ` Xiubo Li
2021-08-18  8:06 ` [PATCH 2/3] ceph: don't WARN if we're force umounting xiubli
2021-08-23 13:49   ` Jeff Layton
2021-08-24  1:07     ` Xiubo Li
2021-08-18  8:06 ` [PATCH 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli
2021-08-23 13:59   ` Jeff Layton
2021-08-24  1:43     ` Xiubo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).