All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiubo Li <xiubli@redhat.com>
To: jlayton@kernel.org
Cc: idryomov@gmail.com, ukernel@gmail.com, pdonnell@redhat.com,
	ceph-devel@vger.kernel.org
Subject: Re: [PATCH v2 1/3] ceph: remove the capsnaps when removing the caps
Date: Wed, 25 Aug 2021 14:32:26 +0800	[thread overview]
Message-ID: <3932b0b5-50cc-3d1e-c508-80ee655a2c38@redhat.com> (raw)
In-Reply-To: <20210825051355.5820-2-xiubli@redhat.com>

There still has one bug, I am looking at it.

Thanks


On 8/25/21 1:13 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> The capsnaps will ihold the inodes when queuing to flush, so when
> force umounting it will close the sessions first and if the MDSes
> respond very fast and the session connections are closed just
> before killing the superblock, which will flush the msgr queue,
> then the flush capsnap callback won't ever be called, which will
> lead the memory leak bug for the ceph_inode_info.
>
> URL: https://tracker.ceph.com/issues/52295
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   fs/ceph/caps.c       | 56 +++++++++++++++++++++++++++++++-------------
>   fs/ceph/mds_client.c | 25 +++++++++++++++++++-
>   fs/ceph/super.h      |  6 +++++
>   3 files changed, 70 insertions(+), 17 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index ddd86106e6d0..557c610289fb 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3658,6 +3658,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>   		iput(inode);
>   }
>   
> +void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
> +			   bool *wake_ci, bool *wake_mdsc)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> +	bool ret;
> +
> +	lockdep_assert_held(&ci->i_ceph_lock);
> +
> +	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
> +
> +	list_del_init(&capsnap->ci_item);
> +	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> +	if (wake_ci)
> +		*wake_ci = ret;
> +
> +	spin_lock(&mdsc->cap_dirty_lock);
> +	if (list_empty(&ci->i_cap_flush_list))
> +		list_del_init(&ci->i_flushing_item);
> +
> +	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
> +	if (wake_mdsc)
> +		*wake_mdsc = ret;
> +	spin_unlock(&mdsc->cap_dirty_lock);
> +}
> +
> +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
> +			 bool *wake_ci, bool *wake_mdsc)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +
> +	lockdep_assert_held(&ci->i_ceph_lock);
> +
> +	WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
> +	__ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
> +}
> +
>   /*
>    * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
>    * throw away our cap_snap.
> @@ -3695,23 +3732,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
>   			     capsnap, capsnap->follows);
>   		}
>   	}
> -	if (flushed) {
> -		WARN_ON(capsnap->dirty_pages || capsnap->writing);
> -		dout(" removing %p cap_snap %p follows %lld\n",
> -		     inode, capsnap, follows);
> -		list_del(&capsnap->ci_item);
> -		wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
> -
> -		spin_lock(&mdsc->cap_dirty_lock);
> -
> -		if (list_empty(&ci->i_cap_flush_list))
> -			list_del_init(&ci->i_flushing_item);
> -
> -		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
> -							  &capsnap->cap_flush);
> -		spin_unlock(&mdsc->cap_dirty_lock);
> -	}
> +	if (flushed)
> +		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
>   	spin_unlock(&ci->i_ceph_lock);
> +
>   	if (flushed) {
>   		ceph_put_snap_context(capsnap->context);
>   		ceph_put_cap_snap(capsnap);
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index df3a735f7837..df10f9b33660 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1604,10 +1604,32 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>   	return ret;
>   }
>   
> +static void remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
> +{
> +	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_cap_snap *capsnap;
> +
> +	lockdep_assert_held(&ci->i_ceph_lock);
> +
> +	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
> +
> +	while (!list_empty(&ci->i_cap_snaps)) {
> +		capsnap = list_first_entry(&ci->i_cap_snaps,
> +					   struct ceph_cap_snap, ci_item);
> +		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
> +		ceph_put_snap_context(capsnap->context);
> +		ceph_put_cap_snap(capsnap);
> +		iput(inode);
> +	}
> +	wake_up_all(&ci->i_cap_wq);
> +	wake_up_all(&mdsc->cap_flushing_wq);
> +}
> +
>   static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>   				  void *arg)
>   {
>   	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
> +	struct ceph_mds_client *mdsc = fsc->mdsc;
>   	struct ceph_inode_info *ci = ceph_inode(inode);
>   	LIST_HEAD(to_remove);
>   	bool dirty_dropped = false;
> @@ -1619,7 +1641,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>   	__ceph_remove_cap(cap, false);
>   	if (!ci->i_auth_cap) {
>   		struct ceph_cap_flush *cf;
> -		struct ceph_mds_client *mdsc = fsc->mdsc;
>   
>   		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
>   			if (inode->i_data.nrpages > 0)
> @@ -1684,6 +1705,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>   			ci->i_prealloc_cap_flush = NULL;
>   		}
>   	}
> +	if (!list_empty(&ci->i_cap_snaps))
> +		remove_capsnaps(mdsc, inode);
>   	spin_unlock(&ci->i_ceph_lock);
>   	while (!list_empty(&to_remove)) {
>   		struct ceph_cap_flush *cf;
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 8f4f2747be65..445d13d760d1 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1169,6 +1169,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
>   					    int had);
>   extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>   				       struct ceph_snap_context *snapc);
> +extern void __ceph_remove_capsnap(struct inode *inode,
> +				  struct ceph_cap_snap *capsnap,
> +				  bool *wake_ci, bool *wake_mdsc);
> +extern void ceph_remove_capsnap(struct inode *inode,
> +				struct ceph_cap_snap *capsnap,
> +				bool *wake_ci, bool *wake_mdsc);
>   extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>   			     struct ceph_mds_session **psession);
>   extern bool __ceph_should_report_size(struct ceph_inode_info *ci);


  reply	other threads:[~2021-08-25  6:33 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-25  5:13 [PATCH v2 0/3] ceph: remove the capsnaps when removing the caps xiubli
2021-08-25  5:13 ` [PATCH v2 1/3] " xiubli
2021-08-25  6:32   ` Xiubo Li [this message]
2021-08-25  5:13 ` [PATCH v2 2/3] ceph: don't WARN if we're force umounting xiubli
2021-08-25  5:13 ` [PATCH v2 3/3] ceph: don't WARN if we're iterate removing the session caps xiubli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3932b0b5-50cc-3d1e-c508-80ee655a2c38@redhat.com \
    --to=xiubli@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=pdonnell@redhat.com \
    --cc=ukernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.