ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4] ceph: correctly release memory from capsnap
@ 2021-08-18 13:38 xiubli
  2021-08-18 16:06 ` Jeff Layton
  0 siblings, 1 reply; 5+ messages in thread
From: xiubli @ 2021-08-18 13:38 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

When force umounting, it will try to remove all the session caps.
If there has any capsnap is in the flushing list, the remove session
caps callback will try to release the capsnap->flush_cap memory to
"ceph_cap_flush_cachep" slab cache, while which is allocated from
kmalloc-256 slab cache.

At the same time switch to list_del_init() because just in case the
force umount has removed it from the lists and the
handle_cap_flushsnap_ack() comes then the seconds list_del_init()
won't crash the kernel.

URL: https://tracker.ceph.com/issues/52283
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

Changed in V4:
- add a new is_capsnap field in ceph_cap_flush struct.


 fs/ceph/caps.c       | 19 ++++++++++++-------
 fs/ceph/mds_client.c |  7 ++++---
 fs/ceph/snap.c       |  1 +
 fs/ceph/super.h      |  3 ++-
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 4663ab830614..52c7026fd0d1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1712,7 +1712,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
 
 struct ceph_cap_flush *ceph_alloc_cap_flush(void)
 {
-	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+	struct ceph_cap_flush *cf;
+
+	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+	cf->is_capsnap = false;
+	return cf;
 }
 
 void ceph_free_cap_flush(struct ceph_cap_flush *cf)
@@ -1747,7 +1751,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
 		prev->wake = true;
 		wake = false;
 	}
-	list_del(&cf->g_list);
+	list_del_init(&cf->g_list);
 	return wake;
 }
 
@@ -1762,7 +1766,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
 		prev->wake = true;
 		wake = false;
 	}
-	list_del(&cf->i_list);
+	list_del_init(&cf->i_list);
 	return wake;
 }
 
@@ -2400,7 +2404,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 	ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
 
 	list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
-		if (!cf->caps) {
+		if (cf->is_capsnap) {
 			last_snap_flush = cf->tid;
 			break;
 		}
@@ -2419,7 +2423,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 
 		first_tid = cf->tid + 1;
 
-		if (cf->caps) {
+		if (!cf->is_capsnap) {
 			struct cap_msg_args arg;
 
 			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
@@ -3568,7 +3572,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 			cleaned = cf->caps;
 
 		/* Is this a capsnap? */
-		if (cf->caps == 0)
+		if (cf->is_capsnap)
 			continue;
 
 		if (cf->tid <= flush_tid) {
@@ -3642,7 +3646,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 		cf = list_first_entry(&to_remove,
 				      struct ceph_cap_flush, i_list);
 		list_del(&cf->i_list);
-		ceph_free_cap_flush(cf);
+		if (!cf->is_capsnap)
+			ceph_free_cap_flush(cf);
 	}
 
 	if (wake_ci)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index dcb5f34a084b..b9e6a69cc058 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1656,7 +1656,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		spin_lock(&mdsc->cap_dirty_lock);
 
 		list_for_each_entry(cf, &to_remove, i_list)
-			list_del(&cf->g_list);
+			list_del_init(&cf->g_list);
 
 		if (!list_empty(&ci->i_dirty_item)) {
 			pr_warn_ratelimited(
@@ -1710,8 +1710,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		struct ceph_cap_flush *cf;
 		cf = list_first_entry(&to_remove,
 				      struct ceph_cap_flush, i_list);
-		list_del(&cf->i_list);
-		ceph_free_cap_flush(cf);
+		list_del_init(&cf->i_list);
+		if (!cf->is_capsnap)
+			ceph_free_cap_flush(cf);
 	}
 
 	wake_up_all(&ci->i_cap_wq);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index af502a8245f0..62fab59bbf96 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -487,6 +487,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
 		return;
 	}
+	capsnap->cap_flush.is_capsnap = true;
 
 	spin_lock(&ci->i_ceph_lock);
 	used = __ceph_caps_used(ci);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 106ddfd1ce92..336350861791 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -186,8 +186,9 @@ struct ceph_cap {
 
 struct ceph_cap_flush {
 	u64 tid;
-	int caps; /* 0 means capsnap */
+	int caps;
 	bool wake; /* wake up flush waiters when finish ? */
+	bool is_capsnap; /* true means capsnap */
 	struct list_head g_list; // global
 	struct list_head i_list; // per inode
 	struct ceph_inode_info *ci;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v4] ceph: correctly release memory from capsnap
  2021-08-18 13:38 [PATCH v4] ceph: correctly release memory from capsnap xiubli
@ 2021-08-18 16:06 ` Jeff Layton
  2021-08-18 23:43   ` Xiubo Li
  0 siblings, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2021-08-18 16:06 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, pdonnell, ceph-devel

On Wed, 2021-08-18 at 21:38 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> When force umounting, it will try to remove all the session caps.
> If there has any capsnap is in the flushing list, the remove session
> caps callback will try to release the capsnap->flush_cap memory to
> "ceph_cap_flush_cachep" slab cache, while which is allocated from
> kmalloc-256 slab cache.
> 
> At the same time switch to list_del_init() because just in case the
> force umount has removed it from the lists and the
> handle_cap_flushsnap_ack() comes then the seconds list_del_init()
> won't crash the kernel.
> 
> URL: https://tracker.ceph.com/issues/52283
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> 
> Changed in V4:
> - add a new is_capsnap field in ceph_cap_flush struct.
> 
> 
>  fs/ceph/caps.c       | 19 ++++++++++++-------
>  fs/ceph/mds_client.c |  7 ++++---
>  fs/ceph/snap.c       |  1 +
>  fs/ceph/super.h      |  3 ++-
>  4 files changed, 19 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 4663ab830614..52c7026fd0d1 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1712,7 +1712,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>  
>  struct ceph_cap_flush *ceph_alloc_cap_flush(void)
>  {
> -	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> +	struct ceph_cap_flush *cf;
> +
> +	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> +	cf->is_capsnap = false;
> +	return cf;
>  }
>  
>  void ceph_free_cap_flush(struct ceph_cap_flush *cf)
> @@ -1747,7 +1751,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
>  		prev->wake = true;
>  		wake = false;
>  	}
> -	list_del(&cf->g_list);
> +	list_del_init(&cf->g_list);
>  	return wake;
>  }
>  
> @@ -1762,7 +1766,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
>  		prev->wake = true;
>  		wake = false;
>  	}
> -	list_del(&cf->i_list);
> +	list_del_init(&cf->i_list);
>  	return wake;
>  }
>  
> @@ -2400,7 +2404,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>  	ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
>  
>  	list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
> -		if (!cf->caps) {
> +		if (cf->is_capsnap) {
>  			last_snap_flush = cf->tid;
>  			break;
>  		}
> @@ -2419,7 +2423,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>  
>  		first_tid = cf->tid + 1;
>  
> -		if (cf->caps) {
> +		if (!cf->is_capsnap) {
>  			struct cap_msg_args arg;
>  
>  			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
> @@ -3568,7 +3572,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>  			cleaned = cf->caps;
>  
>  		/* Is this a capsnap? */
> -		if (cf->caps == 0)
> +		if (cf->is_capsnap)
>  			continue;
>  
>  		if (cf->tid <= flush_tid) {
> @@ -3642,7 +3646,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>  		cf = list_first_entry(&to_remove,
>  				      struct ceph_cap_flush, i_list);
>  		list_del(&cf->i_list);
> -		ceph_free_cap_flush(cf);
> +		if (!cf->is_capsnap)
> +			ceph_free_cap_flush(cf);
>  	}
>  
>  	if (wake_ci)
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index dcb5f34a084b..b9e6a69cc058 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1656,7 +1656,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  		spin_lock(&mdsc->cap_dirty_lock);
>  
>  		list_for_each_entry(cf, &to_remove, i_list)
> -			list_del(&cf->g_list);
> +			list_del_init(&cf->g_list);
>  
>  		if (!list_empty(&ci->i_dirty_item)) {
>  			pr_warn_ratelimited(
> @@ -1710,8 +1710,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  		struct ceph_cap_flush *cf;
>  		cf = list_first_entry(&to_remove,
>  				      struct ceph_cap_flush, i_list);
> -		list_del(&cf->i_list);
> -		ceph_free_cap_flush(cf);
> +		list_del_init(&cf->i_list);
> +		if (!cf->is_capsnap)
> +			ceph_free_cap_flush(cf);
>  	}
>  
>  	wake_up_all(&ci->i_cap_wq);
> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index af502a8245f0..62fab59bbf96 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -487,6 +487,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>  		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
>  		return;
>  	}
> +	capsnap->cap_flush.is_capsnap = true;
>  
>  	spin_lock(&ci->i_ceph_lock);
>  	used = __ceph_caps_used(ci);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 106ddfd1ce92..336350861791 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -186,8 +186,9 @@ struct ceph_cap {
>  
>  struct ceph_cap_flush {
>  	u64 tid;
> -	int caps; /* 0 means capsnap */
> +	int caps;
>  	bool wake; /* wake up flush waiters when finish ? */
> +	bool is_capsnap; /* true means capsnap */
>  	struct list_head g_list; // global
>  	struct list_head i_list; // per inode
>  	struct ceph_inode_info *ci;

Looks good, Xiubo. I'll merge into testing after a bit of local testing
on my part.

I'll plan to mark this one for stable too, but I'll need to look at the
prerequisites as there may be merge conflicts with earlier kernels.
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v4] ceph: correctly release memory from capsnap
  2021-08-18 16:06 ` Jeff Layton
@ 2021-08-18 23:43   ` Xiubo Li
  2021-08-19 10:59     ` Jeff Layton
  0 siblings, 1 reply; 5+ messages in thread
From: Xiubo Li @ 2021-08-18 23:43 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, pdonnell, ceph-devel


On 8/19/21 12:06 AM, Jeff Layton wrote:
> On Wed, 2021-08-18 at 21:38 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> When force umounting, it will try to remove all the session caps.
>> If there has any capsnap is in the flushing list, the remove session
>> caps callback will try to release the capsnap->flush_cap memory to
>> "ceph_cap_flush_cachep" slab cache, while which is allocated from
>> kmalloc-256 slab cache.
>>
>> At the same time switch to list_del_init() because just in case the
>> force umount has removed it from the lists and the
>> handle_cap_flushsnap_ack() comes then the seconds list_del_init()
>> won't crash the kernel.
>>
>> URL: https://tracker.ceph.com/issues/52283
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> Changed in V4:
>> - add a new is_capsnap field in ceph_cap_flush struct.
>>
>>
>>   fs/ceph/caps.c       | 19 ++++++++++++-------
>>   fs/ceph/mds_client.c |  7 ++++---
>>   fs/ceph/snap.c       |  1 +
>>   fs/ceph/super.h      |  3 ++-
>>   4 files changed, 19 insertions(+), 11 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 4663ab830614..52c7026fd0d1 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -1712,7 +1712,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>>   
>>   struct ceph_cap_flush *ceph_alloc_cap_flush(void)
>>   {
>> -	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>> +	struct ceph_cap_flush *cf;
>> +
>> +	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>> +	cf->is_capsnap = false;
>> +	return cf;
>>   }
>>   
>>   void ceph_free_cap_flush(struct ceph_cap_flush *cf)
>> @@ -1747,7 +1751,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
>>   		prev->wake = true;
>>   		wake = false;
>>   	}
>> -	list_del(&cf->g_list);
>> +	list_del_init(&cf->g_list);
>>   	return wake;
>>   }
>>   
>> @@ -1762,7 +1766,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
>>   		prev->wake = true;
>>   		wake = false;
>>   	}
>> -	list_del(&cf->i_list);
>> +	list_del_init(&cf->i_list);
>>   	return wake;
>>   }
>>   
>> @@ -2400,7 +2404,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>>   	ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
>>   
>>   	list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
>> -		if (!cf->caps) {
>> +		if (cf->is_capsnap) {
>>   			last_snap_flush = cf->tid;
>>   			break;
>>   		}
>> @@ -2419,7 +2423,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>>   
>>   		first_tid = cf->tid + 1;
>>   
>> -		if (cf->caps) {
>> +		if (!cf->is_capsnap) {
>>   			struct cap_msg_args arg;
>>   
>>   			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
>> @@ -3568,7 +3572,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>   			cleaned = cf->caps;
>>   
>>   		/* Is this a capsnap? */
>> -		if (cf->caps == 0)
>> +		if (cf->is_capsnap)
>>   			continue;
>>   
>>   		if (cf->tid <= flush_tid) {
>> @@ -3642,7 +3646,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>   		cf = list_first_entry(&to_remove,
>>   				      struct ceph_cap_flush, i_list);
>>   		list_del(&cf->i_list);
>> -		ceph_free_cap_flush(cf);
>> +		if (!cf->is_capsnap)
>> +			ceph_free_cap_flush(cf);
>>   	}
>>   
>>   	if (wake_ci)
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index dcb5f34a084b..b9e6a69cc058 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1656,7 +1656,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   		spin_lock(&mdsc->cap_dirty_lock);
>>   
>>   		list_for_each_entry(cf, &to_remove, i_list)
>> -			list_del(&cf->g_list);
>> +			list_del_init(&cf->g_list);
>>   
>>   		if (!list_empty(&ci->i_dirty_item)) {
>>   			pr_warn_ratelimited(
>> @@ -1710,8 +1710,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   		struct ceph_cap_flush *cf;
>>   		cf = list_first_entry(&to_remove,
>>   				      struct ceph_cap_flush, i_list);
>> -		list_del(&cf->i_list);
>> -		ceph_free_cap_flush(cf);
>> +		list_del_init(&cf->i_list);
>> +		if (!cf->is_capsnap)
>> +			ceph_free_cap_flush(cf);
>>   	}
>>   
>>   	wake_up_all(&ci->i_cap_wq);
>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>> index af502a8245f0..62fab59bbf96 100644
>> --- a/fs/ceph/snap.c
>> +++ b/fs/ceph/snap.c
>> @@ -487,6 +487,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>>   		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
>>   		return;
>>   	}
>> +	capsnap->cap_flush.is_capsnap = true;
>>   
>>   	spin_lock(&ci->i_ceph_lock);
>>   	used = __ceph_caps_used(ci);
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 106ddfd1ce92..336350861791 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -186,8 +186,9 @@ struct ceph_cap {
>>   
>>   struct ceph_cap_flush {
>>   	u64 tid;
>> -	int caps; /* 0 means capsnap */
>> +	int caps;
>>   	bool wake; /* wake up flush waiters when finish ? */
>> +	bool is_capsnap; /* true means capsnap */
>>   	struct list_head g_list; // global
>>   	struct list_head i_list; // per inode
>>   	struct ceph_inode_info *ci;
> Looks good, Xiubo. I'll merge into testing after a bit of local testing
> on my part.
>
> I'll plan to mark this one for stable too, but I'll need to look at the
> prerequisites as there may be merge conflicts with earlier kernels.

I tried it but not all, for some old kernels it may conflict with the 
code only in `__detach_cap_flush_from_mdsc()` and 
`__detach_cap_flush_from_ci()`, which will fold these two funcs into 
__finish_cap_flush().

Thanks

BRs



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v4] ceph: correctly release memory from capsnap
  2021-08-18 23:43   ` Xiubo Li
@ 2021-08-19 10:59     ` Jeff Layton
  2021-08-19 12:40       ` Xiubo Li
  0 siblings, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2021-08-19 10:59 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, pdonnell, ceph-devel

On Thu, 2021-08-19 at 07:43 +0800, Xiubo Li wrote:
> On 8/19/21 12:06 AM, Jeff Layton wrote:
> > On Wed, 2021-08-18 at 21:38 +0800, xiubli@redhat.com wrote:
> > > From: Xiubo Li <xiubli@redhat.com>
> > > 
> > > When force umounting, it will try to remove all the session caps.
> > > If there has any capsnap is in the flushing list, the remove session
> > > caps callback will try to release the capsnap->flush_cap memory to
> > > "ceph_cap_flush_cachep" slab cache, while which is allocated from
> > > kmalloc-256 slab cache.
> > > 
> > > At the same time switch to list_del_init() because just in case the
> > > force umount has removed it from the lists and the
> > > handle_cap_flushsnap_ack() comes then the seconds list_del_init()
> > > won't crash the kernel.
> > > 
> > > URL: https://tracker.ceph.com/issues/52283
> > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > ---
> > > 
> > > Changed in V4:
> > > - add a new is_capsnap field in ceph_cap_flush struct.
> > > 
> > > 
> > >   fs/ceph/caps.c       | 19 ++++++++++++-------
> > >   fs/ceph/mds_client.c |  7 ++++---
> > >   fs/ceph/snap.c       |  1 +
> > >   fs/ceph/super.h      |  3 ++-
> > >   4 files changed, 19 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > > index 4663ab830614..52c7026fd0d1 100644
> > > --- a/fs/ceph/caps.c
> > > +++ b/fs/ceph/caps.c
> > > @@ -1712,7 +1712,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
> > >   
> > >   struct ceph_cap_flush *ceph_alloc_cap_flush(void)
> > >   {
> > > -	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> > > +	struct ceph_cap_flush *cf;
> > > +
> > > +	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> > > +	cf->is_capsnap = false;
> > > +	return cf;
> > >   }
> > >   
> > >   void ceph_free_cap_flush(struct ceph_cap_flush *cf)
> > > @@ -1747,7 +1751,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
> > >   		prev->wake = true;
> > >   		wake = false;
> > >   	}
> > > -	list_del(&cf->g_list);
> > > +	list_del_init(&cf->g_list);
> > >   	return wake;
> > >   }
> > >   
> > > @@ -1762,7 +1766,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
> > >   		prev->wake = true;
> > >   		wake = false;
> > >   	}
> > > -	list_del(&cf->i_list);
> > > +	list_del_init(&cf->i_list);
> > >   	return wake;
> > >   }
> > >   
> > > @@ -2400,7 +2404,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
> > >   	ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
> > >   
> > >   	list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
> > > -		if (!cf->caps) {
> > > +		if (cf->is_capsnap) {
> > >   			last_snap_flush = cf->tid;
> > >   			break;
> > >   		}
> > > @@ -2419,7 +2423,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
> > >   
> > >   		first_tid = cf->tid + 1;
> > >   
> > > -		if (cf->caps) {
> > > +		if (!cf->is_capsnap) {
> > >   			struct cap_msg_args arg;
> > >   
> > >   			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
> > > @@ -3568,7 +3572,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
> > >   			cleaned = cf->caps;
> > >   
> > >   		/* Is this a capsnap? */
> > > -		if (cf->caps == 0)
> > > +		if (cf->is_capsnap)
> > >   			continue;
> > >   
> > >   		if (cf->tid <= flush_tid) {
> > > @@ -3642,7 +3646,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
> > >   		cf = list_first_entry(&to_remove,
> > >   				      struct ceph_cap_flush, i_list);
> > >   		list_del(&cf->i_list);
> > > -		ceph_free_cap_flush(cf);
> > > +		if (!cf->is_capsnap)
> > > +			ceph_free_cap_flush(cf);
> > >   	}
> > >   
> > >   	if (wake_ci)
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index dcb5f34a084b..b9e6a69cc058 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -1656,7 +1656,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> > >   		spin_lock(&mdsc->cap_dirty_lock);
> > >   
> > >   		list_for_each_entry(cf, &to_remove, i_list)
> > > -			list_del(&cf->g_list);
> > > +			list_del_init(&cf->g_list);
> > >   
> > >   		if (!list_empty(&ci->i_dirty_item)) {
> > >   			pr_warn_ratelimited(
> > > @@ -1710,8 +1710,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> > >   		struct ceph_cap_flush *cf;
> > >   		cf = list_first_entry(&to_remove,
> > >   				      struct ceph_cap_flush, i_list);
> > > -		list_del(&cf->i_list);
> > > -		ceph_free_cap_flush(cf);
> > > +		list_del_init(&cf->i_list);
> > > +		if (!cf->is_capsnap)
> > > +			ceph_free_cap_flush(cf);
> > >   	}
> > >   
> > >   	wake_up_all(&ci->i_cap_wq);
> > > diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> > > index af502a8245f0..62fab59bbf96 100644
> > > --- a/fs/ceph/snap.c
> > > +++ b/fs/ceph/snap.c
> > > @@ -487,6 +487,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
> > >   		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
> > >   		return;
> > >   	}
> > > +	capsnap->cap_flush.is_capsnap = true;
> > >   
> > >   	spin_lock(&ci->i_ceph_lock);
> > >   	used = __ceph_caps_used(ci);
> > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > > index 106ddfd1ce92..336350861791 100644
> > > --- a/fs/ceph/super.h
> > > +++ b/fs/ceph/super.h
> > > @@ -186,8 +186,9 @@ struct ceph_cap {
> > >   
> > >   struct ceph_cap_flush {
> > >   	u64 tid;
> > > -	int caps; /* 0 means capsnap */
> > > +	int caps;
> > >   	bool wake; /* wake up flush waiters when finish ? */
> > > +	bool is_capsnap; /* true means capsnap */
> > >   	struct list_head g_list; // global
> > >   	struct list_head i_list; // per inode
> > >   	struct ceph_inode_info *ci;
> > Looks good, Xiubo. I'll merge into testing after a bit of local testing
> > on my part.
> > 
> > I'll plan to mark this one for stable too, but I'll need to look at the
> > prerequisites as there may be merge conflicts with earlier kernels.
> 
> I tried it but not all, for some old kernels it may conflict with the 
> code only in `__detach_cap_flush_from_mdsc()` and 
> `__detach_cap_flush_from_ci()`, which will fold these two funcs into 
> __finish_cap_flush().
> 

Yeah, there are quite a few merge conflicts on older kernels and pulling
in earlier patches to eliminate them just brings in more conflicts. We
may have to do a custom backport on this one for some of the older
stable kernels.

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v4] ceph: correctly release memory from capsnap
  2021-08-19 10:59     ` Jeff Layton
@ 2021-08-19 12:40       ` Xiubo Li
  0 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2021-08-19 12:40 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, pdonnell, ceph-devel


On 8/19/21 6:59 PM, Jeff Layton wrote:
> On Thu, 2021-08-19 at 07:43 +0800, Xiubo Li wrote:
>> On 8/19/21 12:06 AM, Jeff Layton wrote:
>>> On Wed, 2021-08-18 at 21:38 +0800, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> When force umounting, it will try to remove all the session caps.
>>>> If there has any capsnap is in the flushing list, the remove session
>>>> caps callback will try to release the capsnap->flush_cap memory to
>>>> "ceph_cap_flush_cachep" slab cache, while which is allocated from
>>>> kmalloc-256 slab cache.
>>>>
>>>> At the same time switch to list_del_init() because just in case the
>>>> force umount has removed it from the lists and the
>>>> handle_cap_flushsnap_ack() comes then the seconds list_del_init()
>>>> won't crash the kernel.
>>>>
>>>> URL: https://tracker.ceph.com/issues/52283
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>
>>>> Changed in V4:
>>>> - add a new is_capsnap field in ceph_cap_flush struct.
>>>>
>>>>
>>>>    fs/ceph/caps.c       | 19 ++++++++++++-------
>>>>    fs/ceph/mds_client.c |  7 ++++---
>>>>    fs/ceph/snap.c       |  1 +
>>>>    fs/ceph/super.h      |  3 ++-
>>>>    4 files changed, 19 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>> index 4663ab830614..52c7026fd0d1 100644
>>>> --- a/fs/ceph/caps.c
>>>> +++ b/fs/ceph/caps.c
>>>> @@ -1712,7 +1712,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>>>>    
>>>>    struct ceph_cap_flush *ceph_alloc_cap_flush(void)
>>>>    {
>>>> -	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>>>> +	struct ceph_cap_flush *cf;
>>>> +
>>>> +	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>>>> +	cf->is_capsnap = false;
>>>> +	return cf;
>>>>    }
>>>>    
>>>>    void ceph_free_cap_flush(struct ceph_cap_flush *cf)
>>>> @@ -1747,7 +1751,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
>>>>    		prev->wake = true;
>>>>    		wake = false;
>>>>    	}
>>>> -	list_del(&cf->g_list);
>>>> +	list_del_init(&cf->g_list);
>>>>    	return wake;
>>>>    }
>>>>    
>>>> @@ -1762,7 +1766,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
>>>>    		prev->wake = true;
>>>>    		wake = false;
>>>>    	}
>>>> -	list_del(&cf->i_list);
>>>> +	list_del_init(&cf->i_list);
>>>>    	return wake;
>>>>    }
>>>>    
>>>> @@ -2400,7 +2404,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>>>>    	ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
>>>>    
>>>>    	list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
>>>> -		if (!cf->caps) {
>>>> +		if (cf->is_capsnap) {
>>>>    			last_snap_flush = cf->tid;
>>>>    			break;
>>>>    		}
>>>> @@ -2419,7 +2423,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
>>>>    
>>>>    		first_tid = cf->tid + 1;
>>>>    
>>>> -		if (cf->caps) {
>>>> +		if (!cf->is_capsnap) {
>>>>    			struct cap_msg_args arg;
>>>>    
>>>>    			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
>>>> @@ -3568,7 +3572,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>>>    			cleaned = cf->caps;
>>>>    
>>>>    		/* Is this a capsnap? */
>>>> -		if (cf->caps == 0)
>>>> +		if (cf->is_capsnap)
>>>>    			continue;
>>>>    
>>>>    		if (cf->tid <= flush_tid) {
>>>> @@ -3642,7 +3646,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>>>    		cf = list_first_entry(&to_remove,
>>>>    				      struct ceph_cap_flush, i_list);
>>>>    		list_del(&cf->i_list);
>>>> -		ceph_free_cap_flush(cf);
>>>> +		if (!cf->is_capsnap)
>>>> +			ceph_free_cap_flush(cf);
>>>>    	}
>>>>    
>>>>    	if (wake_ci)
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index dcb5f34a084b..b9e6a69cc058 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -1656,7 +1656,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>>    		spin_lock(&mdsc->cap_dirty_lock);
>>>>    
>>>>    		list_for_each_entry(cf, &to_remove, i_list)
>>>> -			list_del(&cf->g_list);
>>>> +			list_del_init(&cf->g_list);
>>>>    
>>>>    		if (!list_empty(&ci->i_dirty_item)) {
>>>>    			pr_warn_ratelimited(
>>>> @@ -1710,8 +1710,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>>    		struct ceph_cap_flush *cf;
>>>>    		cf = list_first_entry(&to_remove,
>>>>    				      struct ceph_cap_flush, i_list);
>>>> -		list_del(&cf->i_list);
>>>> -		ceph_free_cap_flush(cf);
>>>> +		list_del_init(&cf->i_list);
>>>> +		if (!cf->is_capsnap)
>>>> +			ceph_free_cap_flush(cf);
>>>>    	}
>>>>    
>>>>    	wake_up_all(&ci->i_cap_wq);
>>>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>>>> index af502a8245f0..62fab59bbf96 100644
>>>> --- a/fs/ceph/snap.c
>>>> +++ b/fs/ceph/snap.c
>>>> @@ -487,6 +487,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>>>>    		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
>>>>    		return;
>>>>    	}
>>>> +	capsnap->cap_flush.is_capsnap = true;
>>>>    
>>>>    	spin_lock(&ci->i_ceph_lock);
>>>>    	used = __ceph_caps_used(ci);
>>>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>>>> index 106ddfd1ce92..336350861791 100644
>>>> --- a/fs/ceph/super.h
>>>> +++ b/fs/ceph/super.h
>>>> @@ -186,8 +186,9 @@ struct ceph_cap {
>>>>    
>>>>    struct ceph_cap_flush {
>>>>    	u64 tid;
>>>> -	int caps; /* 0 means capsnap */
>>>> +	int caps;
>>>>    	bool wake; /* wake up flush waiters when finish ? */
>>>> +	bool is_capsnap; /* true means capsnap */
>>>>    	struct list_head g_list; // global
>>>>    	struct list_head i_list; // per inode
>>>>    	struct ceph_inode_info *ci;
>>> Looks good, Xiubo. I'll merge into testing after a bit of local testing
>>> on my part.
>>>
>>> I'll plan to mark this one for stable too, but I'll need to look at the
>>> prerequisites as there may be merge conflicts with earlier kernels.
>> I tried it but not all, for some old kernels it may conflict with the
>> code only in `__detach_cap_flush_from_mdsc()` and
>> `__detach_cap_flush_from_ci()`, which will fold these two funcs into
>> __finish_cap_flush().
>>
> Yeah, there are quite a few merge conflicts on older kernels and pulling
> in earlier patches to eliminate them just brings in more conflicts. We
> may have to do a custom backport on this one for some of the older
> stable kernels.
>
Yeah, agree.



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-08-19 12:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-18 13:38 [PATCH v4] ceph: correctly release memory from capsnap xiubli
2021-08-18 16:06 ` Jeff Layton
2021-08-18 23:43   ` Xiubo Li
2021-08-19 10:59     ` Jeff Layton
2021-08-19 12:40       ` Xiubo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).