ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] ceph: correctly release memory from capsnap
@ 2021-08-18  1:25 xiubli
  2021-08-18 11:18 ` Ilya Dryomov
  0 siblings, 1 reply; 5+ messages in thread
From: xiubli @ 2021-08-18  1:25 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

When force umounting, it will try to remove all the session caps.
If there has any capsnap is in the flushing list, the remove session
caps callback will try to release the capsnap->flush_cap memory to
"ceph_cap_flush_cachep" slab cache, while which is allocated from
kmalloc-256 slab cache.

At the same time switch to list_del_init() because just in case the
force umount has removed it from the lists and the
handle_cap_flushsnap_ack() comes then the seconds list_del_init()
won't crash the kernel.

URL: https://tracker.ceph.com/issues/52283
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V3:
- rebase to the upstream


 fs/ceph/caps.c       | 18 ++++++++++++++----
 fs/ceph/mds_client.c |  7 ++++---
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1b9ca437da92..e239f06babbc 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1712,7 +1712,16 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
 
 struct ceph_cap_flush *ceph_alloc_cap_flush(void)
 {
-	return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+	struct ceph_cap_flush *cf;
+
+	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+	/*
+	 * caps == 0 always means for the capsnap
+	 * caps > 0 means dirty caps being flushed
+	 * caps == -1 means preallocated, not used yet
+	 */
+	cf->caps = -1;
+	return cf;
 }
 
 void ceph_free_cap_flush(struct ceph_cap_flush *cf)
@@ -1747,7 +1756,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
 		prev->wake = true;
 		wake = false;
 	}
-	list_del(&cf->g_list);
+	list_del_init(&cf->g_list);
 	return wake;
 }
 
@@ -1762,7 +1771,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
 		prev->wake = true;
 		wake = false;
 	}
-	list_del(&cf->i_list);
+	list_del_init(&cf->i_list);
 	return wake;
 }
 
@@ -3642,7 +3651,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 		cf = list_first_entry(&to_remove,
 				      struct ceph_cap_flush, i_list);
 		list_del(&cf->i_list);
-		ceph_free_cap_flush(cf);
+		if (cf->caps)
+			ceph_free_cap_flush(cf);
 	}
 
 	if (wake_ci)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1e013fb09d73..a44adbd1841b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		spin_lock(&mdsc->cap_dirty_lock);
 
 		list_for_each_entry(cf, &to_remove, i_list)
-			list_del(&cf->g_list);
+			list_del_init(&cf->g_list);
 
 		if (!list_empty(&ci->i_dirty_item)) {
 			pr_warn_ratelimited(
@@ -1688,8 +1688,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		struct ceph_cap_flush *cf;
 		cf = list_first_entry(&to_remove,
 				      struct ceph_cap_flush, i_list);
-		list_del(&cf->i_list);
-		ceph_free_cap_flush(cf);
+		list_del_init(&cf->i_list);
+		if (cf->caps)
+			ceph_free_cap_flush(cf);
 	}
 
 	wake_up_all(&ci->i_cap_wq);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ceph: correctly release memory from capsnap
  2021-08-18  1:25 [PATCH v3] ceph: correctly release memory from capsnap xiubli
@ 2021-08-18 11:18 ` Ilya Dryomov
  2021-08-18 12:39   ` Xiubo Li
  2021-08-18 12:41   ` Jeff Layton
  0 siblings, 2 replies; 5+ messages in thread
From: Ilya Dryomov @ 2021-08-18 11:18 UTC (permalink / raw)
  To: Xiubo Li; +Cc: Jeff Layton, Patrick Donnelly, Ceph Development

On Wed, Aug 18, 2021 at 3:25 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> When force umounting, it will try to remove all the session caps.
> If there has any capsnap is in the flushing list, the remove session
> caps callback will try to release the capsnap->flush_cap memory to
> "ceph_cap_flush_cachep" slab cache, while which is allocated from
> kmalloc-256 slab cache.
>
> At the same time switch to list_del_init() because just in case the
> force umount has removed it from the lists and the
> handle_cap_flushsnap_ack() comes then the seconds list_del_init()
> won't crash the kernel.
>
> URL: https://tracker.ceph.com/issues/52283
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V3:
> - rebase to the upstream
>
>
>  fs/ceph/caps.c       | 18 ++++++++++++++----
>  fs/ceph/mds_client.c |  7 ++++---
>  2 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 1b9ca437da92..e239f06babbc 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1712,7 +1712,16 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>
>  struct ceph_cap_flush *ceph_alloc_cap_flush(void)
>  {
> -       return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> +       struct ceph_cap_flush *cf;
> +
> +       cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> +       /*
> +        * caps == 0 always means for the capsnap
> +        * caps > 0 means dirty caps being flushed
> +        * caps == -1 means preallocated, not used yet
> +        */

Hi Xiubo,

This comment should be in super.h, on struct ceph_cap_flush
definition.

But more importantly, are you sure that overloading cf->caps this way
is safe?  For example, __kick_flushing_caps() tests for cf->caps != 0
and cf->caps == -1 would be interpreted as a cue to call __prep_cap().

Thanks,

                Ilya

> +       cf->caps = -1;
> +       return cf;
>  }
>
>  void ceph_free_cap_flush(struct ceph_cap_flush *cf)
> @@ -1747,7 +1756,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
>                 prev->wake = true;
>                 wake = false;
>         }
> -       list_del(&cf->g_list);
> +       list_del_init(&cf->g_list);
>         return wake;
>  }
>
> @@ -1762,7 +1771,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
>                 prev->wake = true;
>                 wake = false;
>         }
> -       list_del(&cf->i_list);
> +       list_del_init(&cf->i_list);
>         return wake;
>  }
>
> @@ -3642,7 +3651,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>                 cf = list_first_entry(&to_remove,
>                                       struct ceph_cap_flush, i_list);
>                 list_del(&cf->i_list);
> -               ceph_free_cap_flush(cf);
> +               if (cf->caps)
> +                       ceph_free_cap_flush(cf);
>         }
>
>         if (wake_ci)
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 1e013fb09d73..a44adbd1841b 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>                 spin_lock(&mdsc->cap_dirty_lock);
>
>                 list_for_each_entry(cf, &to_remove, i_list)
> -                       list_del(&cf->g_list);
> +                       list_del_init(&cf->g_list);
>
>                 if (!list_empty(&ci->i_dirty_item)) {
>                         pr_warn_ratelimited(
> @@ -1688,8 +1688,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>                 struct ceph_cap_flush *cf;
>                 cf = list_first_entry(&to_remove,
>                                       struct ceph_cap_flush, i_list);
> -               list_del(&cf->i_list);
> -               ceph_free_cap_flush(cf);
> +               list_del_init(&cf->i_list);
> +               if (cf->caps)
> +                       ceph_free_cap_flush(cf);
>         }
>
>         wake_up_all(&ci->i_cap_wq);
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ceph: correctly release memory from capsnap
  2021-08-18 11:18 ` Ilya Dryomov
@ 2021-08-18 12:39   ` Xiubo Li
  2021-08-18 12:41   ` Jeff Layton
  1 sibling, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2021-08-18 12:39 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Jeff Layton, Patrick Donnelly, Ceph Development


On 8/18/21 7:18 PM, Ilya Dryomov wrote:
> On Wed, Aug 18, 2021 at 3:25 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> When force umounting, it will try to remove all the session caps.
>> If there has any capsnap is in the flushing list, the remove session
>> caps callback will try to release the capsnap->flush_cap memory to
>> "ceph_cap_flush_cachep" slab cache, while which is allocated from
>> kmalloc-256 slab cache.
>>
>> At the same time switch to list_del_init() because just in case the
>> force umount has removed it from the lists and the
>> handle_cap_flushsnap_ack() comes then the seconds list_del_init()
>> won't crash the kernel.
>>
>> URL: https://tracker.ceph.com/issues/52283
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V3:
>> - rebase to the upstream
>>
>>
>>   fs/ceph/caps.c       | 18 ++++++++++++++----
>>   fs/ceph/mds_client.c |  7 ++++---
>>   2 files changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 1b9ca437da92..e239f06babbc 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -1712,7 +1712,16 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>>
>>   struct ceph_cap_flush *ceph_alloc_cap_flush(void)
>>   {
>> -       return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>> +       struct ceph_cap_flush *cf;
>> +
>> +       cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>> +       /*
>> +        * caps == 0 always means for the capsnap
>> +        * caps > 0 means dirty caps being flushed
>> +        * caps == -1 means preallocated, not used yet
>> +        */
> Hi Xiubo,
>
> This comment should be in super.h, on struct ceph_cap_flush
> definition.
>
> But more importantly, are you sure that overloading cf->caps this way
> is safe?  For example, __kick_flushing_caps() tests for cf->caps != 0
> and cf->caps == -1 would be interpreted as a cue to call __prep_cap().

Hi Ilya,

Yeah, I think it's safe, because once the cf is added into the 
ci->i_cap_flush_list in __mark_caps_flushing(), it will be guaranteed 
that the cf->caps will be set some dirty caps, which must be > 0 or it 
will trigger BUG_ON().

Here in this patch in remove_session_caps_cb() below, the to_remove list 
will not only pick cf from ci->i_cap_flush_list but also from the 
ci->i_prealloc_cap_flush, which hasn't been initialized and added to the 
ci->i_cap_flush_list yet.

Thanks

BRs


>
> Thanks,
>
>                  Ilya
>
>> +       cf->caps = -1;
>> +       return cf;
>>   }
>>
>>   void ceph_free_cap_flush(struct ceph_cap_flush *cf)
>> @@ -1747,7 +1756,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
>>                  prev->wake = true;
>>                  wake = false;
>>          }
>> -       list_del(&cf->g_list);
>> +       list_del_init(&cf->g_list);
>>          return wake;
>>   }
>>
>> @@ -1762,7 +1771,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
>>                  prev->wake = true;
>>                  wake = false;
>>          }
>> -       list_del(&cf->i_list);
>> +       list_del_init(&cf->i_list);
>>          return wake;
>>   }
>>
>> @@ -3642,7 +3651,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>                  cf = list_first_entry(&to_remove,
>>                                        struct ceph_cap_flush, i_list);
>>                  list_del(&cf->i_list);
>> -               ceph_free_cap_flush(cf);
>> +               if (cf->caps)
>> +                       ceph_free_cap_flush(cf);
>>          }
>>
>>          if (wake_ci)
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 1e013fb09d73..a44adbd1841b 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>                  spin_lock(&mdsc->cap_dirty_lock);
>>
>>                  list_for_each_entry(cf, &to_remove, i_list)
>> -                       list_del(&cf->g_list);
>> +                       list_del_init(&cf->g_list);
>>
>>                  if (!list_empty(&ci->i_dirty_item)) {
>>                          pr_warn_ratelimited(
>> @@ -1688,8 +1688,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>                  struct ceph_cap_flush *cf;
>>                  cf = list_first_entry(&to_remove,
>>                                        struct ceph_cap_flush, i_list);
>> -               list_del(&cf->i_list);
>> -               ceph_free_cap_flush(cf);
>> +               list_del_init(&cf->i_list);
>> +               if (cf->caps)
>> +                       ceph_free_cap_flush(cf);
>>          }
>>
>>          wake_up_all(&ci->i_cap_wq);
>> --
>> 2.27.0
>>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ceph: correctly release memory from capsnap
  2021-08-18 11:18 ` Ilya Dryomov
  2021-08-18 12:39   ` Xiubo Li
@ 2021-08-18 12:41   ` Jeff Layton
  2021-08-18 12:58     ` Xiubo Li
  1 sibling, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2021-08-18 12:41 UTC (permalink / raw)
  To: Ilya Dryomov, Xiubo Li; +Cc: Patrick Donnelly, Ceph Development

On Wed, 2021-08-18 at 13:18 +0200, Ilya Dryomov wrote:
> On Wed, Aug 18, 2021 at 3:25 AM <xiubli@redhat.com> wrote:
> > 
> > From: Xiubo Li <xiubli@redhat.com>
> > 
> > When force umounting, it will try to remove all the session caps.
> > If there has any capsnap is in the flushing list, the remove session
> > caps callback will try to release the capsnap->flush_cap memory to
> > "ceph_cap_flush_cachep" slab cache, while which is allocated from
> > kmalloc-256 slab cache.
> > 
> > At the same time switch to list_del_init() because just in case the
> > force umount has removed it from the lists and the
> > handle_cap_flushsnap_ack() comes then the seconds list_del_init()
> > won't crash the kernel.
> > 
> > URL: https://tracker.ceph.com/issues/52283
> > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > ---
> > 
> > V3:
> > - rebase to the upstream
> > 
> > 
> >  fs/ceph/caps.c       | 18 ++++++++++++++----
> >  fs/ceph/mds_client.c |  7 ++++---
> >  2 files changed, 18 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > index 1b9ca437da92..e239f06babbc 100644
> > --- a/fs/ceph/caps.c
> > +++ b/fs/ceph/caps.c
> > @@ -1712,7 +1712,16 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
> > 
> >  struct ceph_cap_flush *ceph_alloc_cap_flush(void)
> >  {
> > -       return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> > +       struct ceph_cap_flush *cf;
> > +
> > +       cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
> > +       /*
> > +        * caps == 0 always means for the capsnap
> > +        * caps > 0 means dirty caps being flushed
> > +        * caps == -1 means preallocated, not used yet
> > +        */
> 
> Hi Xiubo,
> 
> This comment should be in super.h, on struct ceph_cap_flush
> definition.
> 
> But more importantly, are you sure that overloading cf->caps this way
> is safe?  For example, __kick_flushing_caps() tests for cf->caps != 0
> and cf->caps == -1 would be interpreted as a cue to call __prep_cap().
> 
> Thanks,
> 
>                 Ilya
> 

The cf->caps field should get set to a sane value when it goes onto the
i_cap_flush_list, and I don't see how we'd get into testing against the
->caps field before that point. That said, this mechanism does seem a
bit fragile and subject to later breakage, and the caps code is anything
but clear and easy to follow.

pahole says that there is a 3 byte hole just after the "wake" field in
ceph_cap_flush on x86_64, and that's probably true on other arches as
well. Rather than overloading the caps field with this info, you could
add a new bool there to indicate whether it's embedded or not.


> > +       cf->caps = -1;
> > +       return cf;
> >  }
> > 
> >  void ceph_free_cap_flush(struct ceph_cap_flush *cf)
> > @@ -1747,7 +1756,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
> >                 prev->wake = true;
> >                 wake = false;
> >         }
> > -       list_del(&cf->g_list);
> > +       list_del_init(&cf->g_list);
> >         return wake;
> >  }
> > 
> > @@ -1762,7 +1771,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
> >                 prev->wake = true;
> >                 wake = false;
> >         }
> > -       list_del(&cf->i_list);
> > +       list_del_init(&cf->i_list);
> >         return wake;
> >  }
> > 
> > @@ -3642,7 +3651,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
> >                 cf = list_first_entry(&to_remove,
> >                                       struct ceph_cap_flush, i_list);
> >                 list_del(&cf->i_list);
> > -               ceph_free_cap_flush(cf);
> > +               if (cf->caps)
> > +                       ceph_free_cap_flush(cf);
> >         }
> > 
> >         if (wake_ci)
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 1e013fb09d73..a44adbd1841b 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> >                 spin_lock(&mdsc->cap_dirty_lock);
> > 
> >                 list_for_each_entry(cf, &to_remove, i_list)
> > -                       list_del(&cf->g_list);
> > +                       list_del_init(&cf->g_list);
> > 
> >                 if (!list_empty(&ci->i_dirty_item)) {
> >                         pr_warn_ratelimited(
> > @@ -1688,8 +1688,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> >                 struct ceph_cap_flush *cf;
> >                 cf = list_first_entry(&to_remove,
> >                                       struct ceph_cap_flush, i_list);
> > -               list_del(&cf->i_list);
> > -               ceph_free_cap_flush(cf);
> > +               list_del_init(&cf->i_list);
> > +               if (cf->caps)
> > +                       ceph_free_cap_flush(cf);
> >         }
> > 
> >         wake_up_all(&ci->i_cap_wq);
> > --
> > 2.27.0
> > 

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ceph: correctly release memory from capsnap
  2021-08-18 12:41   ` Jeff Layton
@ 2021-08-18 12:58     ` Xiubo Li
  0 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2021-08-18 12:58 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov; +Cc: Patrick Donnelly, Ceph Development


On 8/18/21 8:41 PM, Jeff Layton wrote:
> On Wed, 2021-08-18 at 13:18 +0200, Ilya Dryomov wrote:
>> On Wed, Aug 18, 2021 at 3:25 AM <xiubli@redhat.com> wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> When force umounting, it will try to remove all the session caps.
>>> If there has any capsnap is in the flushing list, the remove session
>>> caps callback will try to release the capsnap->flush_cap memory to
>>> "ceph_cap_flush_cachep" slab cache, while which is allocated from
>>> kmalloc-256 slab cache.
>>>
>>> At the same time switch to list_del_init() because just in case the
>>> force umount has removed it from the lists and the
>>> handle_cap_flushsnap_ack() comes then the seconds list_del_init()
>>> won't crash the kernel.
>>>
>>> URL: https://tracker.ceph.com/issues/52283
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>
>>> V3:
>>> - rebase to the upstream
>>>
>>>
>>>   fs/ceph/caps.c       | 18 ++++++++++++++----
>>>   fs/ceph/mds_client.c |  7 ++++---
>>>   2 files changed, 18 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>> index 1b9ca437da92..e239f06babbc 100644
>>> --- a/fs/ceph/caps.c
>>> +++ b/fs/ceph/caps.c
>>> @@ -1712,7 +1712,16 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
>>>
>>>   struct ceph_cap_flush *ceph_alloc_cap_flush(void)
>>>   {
>>> -       return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>>> +       struct ceph_cap_flush *cf;
>>> +
>>> +       cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
>>> +       /*
>>> +        * caps == 0 always means for the capsnap
>>> +        * caps > 0 means dirty caps being flushed
>>> +        * caps == -1 means preallocated, not used yet
>>> +        */
>> Hi Xiubo,
>>
>> This comment should be in super.h, on struct ceph_cap_flush
>> definition.
>>
>> But more importantly, are you sure that overloading cf->caps this way
>> is safe?  For example, __kick_flushing_caps() tests for cf->caps != 0
>> and cf->caps == -1 would be interpreted as a cue to call __prep_cap().
>>
>> Thanks,
>>
>>                  Ilya
>>
> The cf->caps field should get set to a sane value when it goes onto the
> i_cap_flush_list, and I don't see how we'd get into testing against the
> ->caps field before that point. That said, this mechanism does seem a
> bit fragile and subject to later breakage, and the caps code is anything
> but clear and easy to follow.
>
> pahole says that there is a 3 byte hole just after the "wake" field in
> ceph_cap_flush on x86_64, and that's probably true on other arches as
> well. Rather than overloading the caps field with this info, you could
> add a new bool there to indicate whether it's embedded or not.

Okay, this also sounds good to me.

I will do that and sent out the V4 later.

Thanks


>
>>> +       cf->caps = -1;
>>> +       return cf;
>>>   }
>>>
>>>   void ceph_free_cap_flush(struct ceph_cap_flush *cf)
>>> @@ -1747,7 +1756,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
>>>                  prev->wake = true;
>>>                  wake = false;
>>>          }
>>> -       list_del(&cf->g_list);
>>> +       list_del_init(&cf->g_list);
>>>          return wake;
>>>   }
>>>
>>> @@ -1762,7 +1771,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
>>>                  prev->wake = true;
>>>                  wake = false;
>>>          }
>>> -       list_del(&cf->i_list);
>>> +       list_del_init(&cf->i_list);
>>>          return wake;
>>>   }
>>>
>>> @@ -3642,7 +3651,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>>>                  cf = list_first_entry(&to_remove,
>>>                                        struct ceph_cap_flush, i_list);
>>>                  list_del(&cf->i_list);
>>> -               ceph_free_cap_flush(cf);
>>> +               if (cf->caps)
>>> +                       ceph_free_cap_flush(cf);
>>>          }
>>>
>>>          if (wake_ci)
>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>> index 1e013fb09d73..a44adbd1841b 100644
>>> --- a/fs/ceph/mds_client.c
>>> +++ b/fs/ceph/mds_client.c
>>> @@ -1636,7 +1636,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>                  spin_lock(&mdsc->cap_dirty_lock);
>>>
>>>                  list_for_each_entry(cf, &to_remove, i_list)
>>> -                       list_del(&cf->g_list);
>>> +                       list_del_init(&cf->g_list);
>>>
>>>                  if (!list_empty(&ci->i_dirty_item)) {
>>>                          pr_warn_ratelimited(
>>> @@ -1688,8 +1688,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>                  struct ceph_cap_flush *cf;
>>>                  cf = list_first_entry(&to_remove,
>>>                                        struct ceph_cap_flush, i_list);
>>> -               list_del(&cf->i_list);
>>> -               ceph_free_cap_flush(cf);
>>> +               list_del_init(&cf->i_list);
>>> +               if (cf->caps)
>>> +                       ceph_free_cap_flush(cf);
>>>          }
>>>
>>>          wake_up_all(&ci->i_cap_wq);
>>> --
>>> 2.27.0
>>>


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-08-18 12:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-18  1:25 [PATCH v3] ceph: correctly release memory from capsnap xiubli
2021-08-18 11:18 ` Ilya Dryomov
2021-08-18 12:39   ` Xiubo Li
2021-08-18 12:41   ` Jeff Layton
2021-08-18 12:58     ` Xiubo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).