All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ceph: eliminate the recursion when rebuilding the snap context
@ 2022-02-16  5:43 xiubli
  2022-02-16 14:13 ` Jeff Layton
  0 siblings, 1 reply; 3+ messages in thread
From: xiubli @ 2022-02-16  5:43 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Use a list instead of recuresion to avoid possible stack overflow.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/snap.c  | 44 +++++++++++++++++++++++++++++++++++---------
 fs/ceph/super.h |  2 ++
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 6939307d41cb..808add7dca9e 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -319,7 +319,8 @@ static int cmpu64_rev(const void *a, const void *b)
  * build the snap context for a given realm.
  */
 static int build_snap_context(struct ceph_snap_realm *realm,
-			      struct list_head* dirty_realms)
+			      struct list_head *realm_queue,
+			      struct list_head *dirty_realms)
 {
 	struct ceph_snap_realm *parent = realm->parent;
 	struct ceph_snap_context *snapc;
@@ -333,9 +334,9 @@ static int build_snap_context(struct ceph_snap_realm *realm,
 	 */
 	if (parent) {
 		if (!parent->cached_context) {
-			err = build_snap_context(parent, dirty_realms);
-			if (err)
-				goto fail;
+			/* add to the queue head */
+			list_add(&parent->rebuild_item, realm_queue);
+			return 1;
 		}
 		num += parent->cached_context->num_snaps;
 	}
@@ -418,13 +419,38 @@ static int build_snap_context(struct ceph_snap_realm *realm,
 static void rebuild_snap_realms(struct ceph_snap_realm *realm,
 				struct list_head *dirty_realms)
 {
-	struct ceph_snap_realm *child;
+	LIST_HEAD(realm_queue);
+	int last = 0;
 
-	dout("%s %llx %p\n", __func__, realm->ino, realm);
-	build_snap_context(realm, dirty_realms);
+	list_add_tail(&realm->rebuild_item, &realm_queue);
+
+	while (!list_empty(&realm_queue)) {
+		struct ceph_snap_realm *_realm, *child;
+
+		/*
+		 * If the last building failed dues to memory
+		 * issue, just empty the realm_queue and return
+		 * to avoid infinite loop.
+		 */
+		if (last < 0) {
+			list_del(&_realm->rebuild_item);
+			continue;
+		}
+
+		_realm = list_first_entry(&realm_queue,
+					  struct ceph_snap_realm,
+					  rebuild_item);
+		last = build_snap_context(_realm, &realm_queue, dirty_realms);
+		dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm,
+		     last > 0 ? "is deferred" : !last ? "succeeded" : "failed");
 
-	list_for_each_entry(child, &realm->children, child_item)
-		rebuild_snap_realms(child, dirty_realms);
+		list_for_each_entry(child, &_realm->children, child_item)
+			list_add_tail(&child->rebuild_item, &realm_queue);
+
+		/* last == 1 means need to build parent first */
+		if (last <= 0)
+			list_del(&_realm->rebuild_item);
+	}
 }
 
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a17bd01a8957..baac800a6d11 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -885,6 +885,8 @@ struct ceph_snap_realm {
 
 	struct list_head dirty_item;     /* if realm needs new context */
 
+	struct list_head rebuild_item;   /* rebuild snap realms _downward_ in hierarchy */
+
 	/* the current set of snaps for this realm */
 	struct ceph_snap_context *cached_context;
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] ceph: eliminate the recursion when rebuilding the snap context
  2022-02-16  5:43 [PATCH] ceph: eliminate the recursion when rebuilding the snap context xiubli
@ 2022-02-16 14:13 ` Jeff Layton
  2022-02-17  2:30   ` Xiubo Li
  0 siblings, 1 reply; 3+ messages in thread
From: Jeff Layton @ 2022-02-16 14:13 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, vshankar, ceph-devel

On Wed, 2022-02-16 at 13:43 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Use a list instead of recuresion to avoid possible stack overflow.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/snap.c  | 44 +++++++++++++++++++++++++++++++++++---------
>  fs/ceph/super.h |  2 ++
>  2 files changed, 37 insertions(+), 9 deletions(-)
> 

Thanks Xiubo. This seems sane to me.

> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index 6939307d41cb..808add7dca9e 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -319,7 +319,8 @@ static int cmpu64_rev(const void *a, const void *b)
>   * build the snap context for a given realm.
>   */
>  static int build_snap_context(struct ceph_snap_realm *realm,
> -			      struct list_head* dirty_realms)
> +			      struct list_head *realm_queue,
> +			      struct list_head *dirty_realms)
>  {
>  	struct ceph_snap_realm *parent = realm->parent;
>  	struct ceph_snap_context *snapc;
> @@ -333,9 +334,9 @@ static int build_snap_context(struct ceph_snap_realm *realm,
>  	 */
>  	if (parent) {
>  		if (!parent->cached_context) {
> -			err = build_snap_context(parent, dirty_realms);
> -			if (err)
> -				goto fail;
> +			/* add to the queue head */
> +			list_add(&parent->rebuild_item, realm_queue);
> +			return 1;
>  		}
>  		num += parent->cached_context->num_snaps;
>  	}
> @@ -418,13 +419,38 @@ static int build_snap_context(struct ceph_snap_realm *realm,
>  static void rebuild_snap_realms(struct ceph_snap_realm *realm,
>  				struct list_head *dirty_realms)
>  {
> -	struct ceph_snap_realm *child;
> +	LIST_HEAD(realm_queue);
> +	int last = 0;
>  
> -	dout("%s %llx %p\n", __func__, realm->ino, realm);
> -	build_snap_context(realm, dirty_realms);
> +	list_add_tail(&realm->rebuild_item, &realm_queue);
> +
> +	while (!list_empty(&realm_queue)) {
> +		struct ceph_snap_realm *_realm, *child;
> +
> +		/*
> +		 * If the last building failed dues to memory
> +		 * issue, just empty the realm_queue and return
> +		 * to avoid infinite loop.
> +		 */
> +		if (last < 0) {
> +			list_del(&_realm->rebuild_item);
> +			continue;
> +		}

So if this ends up happening, then we'll just end up silently returning
and not report anything to the console. Should we pr_warn or something
instead? We could make rebuild_snap_realms be an int return function,
and have it trigger the pr_err in ceph_update_snap_trace. That message
is pretty cryptic though.


It seems like the realm hierarchy would be FUBAR at this point. What's
the likely effect if that happens? Are there any steps an admin could
take to try and rescue things (maybe after freeing some memory on the
box)?

> +
> +		_realm = list_first_entry(&realm_queue,
> +					  struct ceph_snap_realm,
> +					  rebuild_item);
> +		last = build_snap_context(_realm, &realm_queue, dirty_realms);
> +		dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm,
> +		     last > 0 ? "is deferred" : !last ? "succeeded" : "failed");
>  
> -	list_for_each_entry(child, &realm->children, child_item)
> -		rebuild_snap_realms(child, dirty_realms);
> +		list_for_each_entry(child, &_realm->children, child_item)
> +			list_add_tail(&child->rebuild_item, &realm_queue);
> +
> +		/* last == 1 means need to build parent first */
> +		if (last <= 0)
> +			list_del(&_realm->rebuild_item);
> +	}
>  }
>  
>  
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index a17bd01a8957..baac800a6d11 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -885,6 +885,8 @@ struct ceph_snap_realm {
>  
>  	struct list_head dirty_item;     /* if realm needs new context */
>  
> +	struct list_head rebuild_item;   /* rebuild snap realms _downward_ in hierarchy */
> +
>  	/* the current set of snaps for this realm */
>  	struct ceph_snap_context *cached_context;
>  

I'll plan to merge this into testing branch and do some testing with it,
but I wouldn't mind a v2 or follow-on patch that clarifies what can be
done if build_snap_context fails.

Thanks,
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] ceph: eliminate the recursion when rebuilding the snap context
  2022-02-16 14:13 ` Jeff Layton
@ 2022-02-17  2:30   ` Xiubo Li
  0 siblings, 0 replies; 3+ messages in thread
From: Xiubo Li @ 2022-02-17  2:30 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, ceph-devel


On 2/16/22 10:13 PM, Jeff Layton wrote:
> On Wed, 2022-02-16 at 13:43 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Use a list instead of recuresion to avoid possible stack overflow.
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/snap.c  | 44 +++++++++++++++++++++++++++++++++++---------
>>   fs/ceph/super.h |  2 ++
>>   2 files changed, 37 insertions(+), 9 deletions(-)
>>
> Thanks Xiubo. This seems sane to me.
>
>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>> index 6939307d41cb..808add7dca9e 100644
>> --- a/fs/ceph/snap.c
>> +++ b/fs/ceph/snap.c
>> @@ -319,7 +319,8 @@ static int cmpu64_rev(const void *a, const void *b)
>>    * build the snap context for a given realm.
>>    */
>>   static int build_snap_context(struct ceph_snap_realm *realm,
>> -			      struct list_head* dirty_realms)
>> +			      struct list_head *realm_queue,
>> +			      struct list_head *dirty_realms)
>>   {
>>   	struct ceph_snap_realm *parent = realm->parent;
>>   	struct ceph_snap_context *snapc;
>> @@ -333,9 +334,9 @@ static int build_snap_context(struct ceph_snap_realm *realm,
>>   	 */
>>   	if (parent) {
>>   		if (!parent->cached_context) {
>> -			err = build_snap_context(parent, dirty_realms);
>> -			if (err)
>> -				goto fail;
>> +			/* add to the queue head */
>> +			list_add(&parent->rebuild_item, realm_queue);
>> +			return 1;
>>   		}
>>   		num += parent->cached_context->num_snaps;
>>   	}
>> @@ -418,13 +419,38 @@ static int build_snap_context(struct ceph_snap_realm *realm,
>>   static void rebuild_snap_realms(struct ceph_snap_realm *realm,
>>   				struct list_head *dirty_realms)
>>   {
>> -	struct ceph_snap_realm *child;
>> +	LIST_HEAD(realm_queue);
>> +	int last = 0;
>>   
>> -	dout("%s %llx %p\n", __func__, realm->ino, realm);
>> -	build_snap_context(realm, dirty_realms);
>> +	list_add_tail(&realm->rebuild_item, &realm_queue);
>> +
>> +	while (!list_empty(&realm_queue)) {
>> +		struct ceph_snap_realm *_realm, *child;
>> +
>> +		/*
>> +		 * If the last building failed dues to memory
>> +		 * issue, just empty the realm_queue and return
>> +		 * to avoid infinite loop.
>> +		 */
>> +		if (last < 0) {
>> +			list_del(&_realm->rebuild_item);
>> +			continue;
>> +		}
> So if this ends up happening, then we'll just end up silently returning
> and not report anything to the console. Should we pr_warn or something
> instead?

Maybe a warning once on this case ? Or should we print all of the realm 
info in the warning logs ?

See my following comments, if we need to fix it I could add this in the 
following patches, and there have some other places need to do the same too.

> We could make rebuild_snap_realms be an int return function,
> and have it trigger the pr_err in ceph_update_snap_trace. That message
> is pretty cryptic though.
>
>
> It seems like the realm hierarchy would be FUBAR at this point. What's
> the likely effect if that happens? Are there any steps an admin could
> take to try and rescue things (maybe after freeing some memory on the
> box)?

 From doc 
https://docs.ceph.com/en/latest/dev/cephfs-snapshots/#snapshot-writeback.

As my understanding we should just make the whole kclient mount or 
related directory to be readonly and at least shouldn't allow any new 
write in this case. Or since we cannot generate CapSnaps for the inodes, 
so we cannot block fresh data writes ?

The whole kclient does nothing in this case in other places. I checked 
the libcephfs code, it juse call ceph_assert() or throws a 
std::bad_alloc exception to abort the daemon.

IMO we should fix it in kclient. Do you think this makes sense ? If so I 
can do that in a separate patch later. For this patch since it intend to 
eliminate the recursion I will keep this code as it was and there has 
some other places need to be fixed too.

-- Xiubo


>> +
>> +		_realm = list_first_entry(&realm_queue,
>> +					  struct ceph_snap_realm,
>> +					  rebuild_item);
>> +		last = build_snap_context(_realm, &realm_queue, dirty_realms);
>> +		dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm,
>> +		     last > 0 ? "is deferred" : !last ? "succeeded" : "failed");
>>   
>> -	list_for_each_entry(child, &realm->children, child_item)
>> -		rebuild_snap_realms(child, dirty_realms);
>> +		list_for_each_entry(child, &_realm->children, child_item)
>> +			list_add_tail(&child->rebuild_item, &realm_queue);
>> +
>> +		/* last == 1 means need to build parent first */
>> +		if (last <= 0)
>> +			list_del(&_realm->rebuild_item);
>> +	}
>>   }
>>   
>>   
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index a17bd01a8957..baac800a6d11 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -885,6 +885,8 @@ struct ceph_snap_realm {
>>   
>>   	struct list_head dirty_item;     /* if realm needs new context */
>>   
>> +	struct list_head rebuild_item;   /* rebuild snap realms _downward_ in hierarchy */
>> +
>>   	/* the current set of snaps for this realm */
>>   	struct ceph_snap_context *cached_context;
>>   
> I'll plan to merge this into testing branch and do some testing with it,
> but I wouldn't mind a v2 or follow-on patch that clarifies what can be
> done if build_snap_context fails.
>
> Thanks,


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-02-17  2:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-16  5:43 [PATCH] ceph: eliminate the recursion when rebuilding the snap context xiubli
2022-02-16 14:13 ` Jeff Layton
2022-02-17  2:30   ` Xiubo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.