ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Xiubo Li <xiubli@redhat.com>
To: Jeff Layton <jlayton@kernel.org>
Cc: idryomov@gmail.com, pdonnell@redhat.com, ceph-devel@vger.kernel.org
Subject: Re: [PATCH v4] ceph: correctly release memory from capsnap
Date: Thu, 19 Aug 2021 20:40:10 +0800	[thread overview]
Message-ID: <b79d3b90-49e8-8244-d22b-dc25a97e2ea5@redhat.com> (raw)
In-Reply-To: <e290f32319721d894b1d44c4be37b5b617a18780.camel@kernel.org>


On 8/19/21 6:59 PM, Jeff Layton wrote:
> On Thu, 2021-08-19 at 07:43 +0800, Xiubo Li wrote:
>> On 8/19/21 12:06 AM, Jeff Layton wrote:
>>> On Wed, 2021-08-18 at 21:38 +0800, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> When force umounting, it will try to remove all the session caps.
>>>> If there has any capsnap is in the flushing list, the remove session
>>>> caps callback will try to release the capsnap->flush_cap memory to
>>>> "ceph_cap_flush_cachep" slab cache, while which is allocated from
>>>> kmalloc-256 slab cache.
>>>>
>>>> At the same time switch to list_del_init() because just in case the
>>>> force umount has removed it from the lists and the
>>>> handle_cap_flushsnap_ack() comes then the seconds list_del_init()
>>>> won't crash the kernel.
>>>>
>>>> URL: https://tracker.ceph.com/issues/52283
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>
>>>> Changed in V4:
>>>> - add a new is_capsnap field in ceph_cap_flush struct.
>>>>
>>>>
>>>>    fs/ceph/caps.c       | 19 ++++++++++++-------
>>>>    fs/ceph/mds_client.c |  7 ++++---
>>>>    fs/ceph/snap.c       |  1 +
>>>>    fs/ceph/super.h      |  3 ++-
>>>>    4 files changed, 19 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>> index 4663ab830614..52c7026fd0d1 100644
>>>> --- a/fs/ceph/caps.c
>>>> +++ b/fs/ceph/caps.c
>>>> @@ -1712,7 +1712,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>>>>    
>>>>    struct ceph_cap_flush *ceph_alloc_cap_flush(void)
>>>>    {
>>>> -	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>>>> +	struct ceph_cap_flush *cf;
>>>> +
>>>> +	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>>>> +	cf->is_capsnap = false;
>>>> +	return cf;
>>>>    }
>>>>    
>>>>    void ceph_free_cap_flush(struct ceph_cap_flush *cf)
>>>> @@ -1747,7 +1751,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
>>>>    		prev->wake = true;
>>>>    		wake = false;
>>>>    	}
>>>> -	list_del(&cf->g_list);
>>>> +	list_del_init(&cf->g_list);
>>>>    	return wake;
>>>>    }
>>>>    
>>>> @@ -1762,7 +1766,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
>>>>    		prev->wake = true;
>>>>    		wake = false;
>>>>    	}
>>>> -	list_del(&cf->i_list);
>>>> +	list_del_init(&cf->i_list);
>>>>    	return wake;
>>>>    }
>>>>    
>>>> @@ -2400,7 +2404,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>>>>    	ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
>>>>    
>>>>    	list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
>>>> -		if (!cf->caps) {
>>>> +		if (cf->is_capsnap) {
>>>>    			last_snap_flush = cf->tid;
>>>>    			break;
>>>>    		}
>>>> @@ -2419,7 +2423,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>>>>    
>>>>    		first_tid = cf->tid + 1;
>>>>    
>>>> -		if (cf->caps) {
>>>> +		if (!cf->is_capsnap) {
>>>>    			struct cap_msg_args arg;
>>>>    
>>>>    			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
>>>> @@ -3568,7 +3572,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>>>    			cleaned = cf->caps;
>>>>    
>>>>    		/* Is this a capsnap? */
>>>> -		if (cf->caps == 0)
>>>> +		if (cf->is_capsnap)
>>>>    			continue;
>>>>    
>>>>    		if (cf->tid <= flush_tid) {
>>>> @@ -3642,7 +3646,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>>>    		cf = list_first_entry(&to_remove,
>>>>    				      struct ceph_cap_flush, i_list);
>>>>    		list_del(&cf->i_list);
>>>> -		ceph_free_cap_flush(cf);
>>>> +		if (!cf->is_capsnap)
>>>> +			ceph_free_cap_flush(cf);
>>>>    	}
>>>>    
>>>>    	if (wake_ci)
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index dcb5f34a084b..b9e6a69cc058 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -1656,7 +1656,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>>    		spin_lock(&mdsc->cap_dirty_lock);
>>>>    
>>>>    		list_for_each_entry(cf, &to_remove, i_list)
>>>> -			list_del(&cf->g_list);
>>>> +			list_del_init(&cf->g_list);
>>>>    
>>>>    		if (!list_empty(&ci->i_dirty_item)) {
>>>>    			pr_warn_ratelimited(
>>>> @@ -1710,8 +1710,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>>    		struct ceph_cap_flush *cf;
>>>>    		cf = list_first_entry(&to_remove,
>>>>    				      struct ceph_cap_flush, i_list);
>>>> -		list_del(&cf->i_list);
>>>> -		ceph_free_cap_flush(cf);
>>>> +		list_del_init(&cf->i_list);
>>>> +		if (!cf->is_capsnap)
>>>> +			ceph_free_cap_flush(cf);
>>>>    	}
>>>>    
>>>>    	wake_up_all(&ci->i_cap_wq);
>>>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>>>> index af502a8245f0..62fab59bbf96 100644
>>>> --- a/fs/ceph/snap.c
>>>> +++ b/fs/ceph/snap.c
>>>> @@ -487,6 +487,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>>>>    		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
>>>>    		return;
>>>>    	}
>>>> +	capsnap->cap_flush.is_capsnap = true;
>>>>    
>>>>    	spin_lock(&ci->i_ceph_lock);
>>>>    	used = __ceph_caps_used(ci);
>>>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>>>> index 106ddfd1ce92..336350861791 100644
>>>> --- a/fs/ceph/super.h
>>>> +++ b/fs/ceph/super.h
>>>> @@ -186,8 +186,9 @@ struct ceph_cap {
>>>>    
>>>>    struct ceph_cap_flush {
>>>>    	u64 tid;
>>>> -	int caps; /* 0 means capsnap */
>>>> +	int caps;
>>>>    	bool wake; /* wake up flush waiters when finish ? */
>>>> +	bool is_capsnap; /* true means capsnap */
>>>>    	struct list_head g_list; // global
>>>>    	struct list_head i_list; // per inode
>>>>    	struct ceph_inode_info *ci;
>>> Looks good, Xiubo. I'll merge into testing after a bit of local testing
>>> on my part.
>>>
>>> I'll plan to mark this one for stable too, but I'll need to look at the
>>> prerequisites as there may be merge conflicts with earlier kernels.
>> I tried it but not all, for some old kernels it may conflict with the
>> code only in `__detach_cap_flush_from_mdsc()` and
>> `__detach_cap_flush_from_ci()`, which will fold these two funcs into
>> __finish_cap_flush().
>>
> Yeah, there are quite a few merge conflicts on older kernels and pulling
> in earlier patches to eliminate them just brings in more conflicts. We
> may have to do a custom backport on this one for some of the older
> stable kernels.
>
Yeah, agree.



      reply	other threads:[~2021-08-19 12:40 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-18 13:38 [PATCH v4] ceph: correctly release memory from capsnap xiubli
2021-08-18 16:06 ` Jeff Layton
2021-08-18 23:43   ` Xiubo Li
2021-08-19 10:59     ` Jeff Layton
2021-08-19 12:40       ` Xiubo Li [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b79d3b90-49e8-8244-d22b-dc25a97e2ea5@redhat.com \
    --to=xiubli@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=pdonnell@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).