FSTests Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] btrfs: speedup mount time with force readahead chunk tree
@ 2020-07-01  9:24 robbieko
  2020-07-01 10:10 ` Robbie Ko
  2020-07-01 10:58 ` Qu Wenruo
  0 siblings, 2 replies; 4+ messages in thread
From: robbieko @ 2020-07-01  9:24 UTC (permalink / raw)
  To: fstests; +Cc: linux-btrfs, Robbie Ko

From: Robbie Ko <robbieko@synology.com>

When mounting, we always need to read the whole chunk tree,
when there are too many chunk items, most of the time is
spent on btrfs_read_chunk_tree, because we only read one
leaf at a time.

We fix this by adding a new readahead mode READA_FORWARD_FORCE,
which reads all the leaves after the key in the node when
reading a level 1 node.

Signed-off-by: Robbie Ko <robbieko@synology.com>
---
 fs/btrfs/ctree.c   | 7 +++++--
 fs/btrfs/ctree.h   | 2 +-
 fs/btrfs/volumes.c | 1 +
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3a7648bff42c..abb9108e2d7d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2194,7 +2194,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
 			if (nr == 0)
 				break;
 			nr--;
-		} else if (path->reada == READA_FORWARD) {
+		} else if (path->reada == READA_FORWARD || path->reada == READA_FORWARD_FORCE) {
 			nr++;
 			if (nr >= nritems)
 				break;
@@ -2205,12 +2205,15 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
 				break;
 		}
 		search = btrfs_node_blockptr(node, nr);
-		if ((search <= target && target - search <= 65536) ||
+		if ((path->reada == READA_FORWARD_FORCE) ||
+		    (search <= target && target - search <= 65536) ||
 		    (search > target && search - target <= 65536)) {
 			readahead_tree_block(fs_info, search);
 			nread += blocksize;
 		}
 		nscan++;
+		if (path->reada == READA_FORWARD_FORCE)
+			continue;
 		if ((nread > 65536 || nscan > 32))
 			break;
 	}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d404cce8ae40..808bcbdc9530 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -353,7 +353,7 @@ struct btrfs_node {
  * The slots array records the index of the item or block pointer
  * used while walking the tree.
  */
-enum { READA_NONE, READA_BACK, READA_FORWARD };
+enum { READA_NONE, READA_BACK, READA_FORWARD, READA_FORWARD_FORCE };
 struct btrfs_path {
 	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
 	int slots[BTRFS_MAX_LEVEL];
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0d6e785bcb98..78fd65abff69 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7043,6 +7043,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
+	path->reada = READA_FORWARD_FORCE;
 
 	/*
 	 * uuid_mutex is needed only if we are mounting a sprout FS
-- 
2.17.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] btrfs: speedup mount time with force readahead chunk tree
  2020-07-01  9:24 [PATCH] btrfs: speedup mount time with force readahead chunk tree robbieko
@ 2020-07-01 10:10 ` Robbie Ko
  2020-07-01 10:58 ` Qu Wenruo
  1 sibling, 0 replies; 4+ messages in thread
From: Robbie Ko @ 2020-07-01 10:10 UTC (permalink / raw)
  To: fstests

please ignore this.

> From: Robbie Ko <robbieko@synology.com>
>
> When mounting, we always need to read the whole chunk tree,
> when there are too many chunk items, most of the time is
> spent on btrfs_read_chunk_tree, because we only read one
> leaf at a time.
>
> We fix this by adding a new readahead mode READA_FORWARD_FORCE,
> which reads all the leaves after the key in the node when
> reading a level 1 node.
>
> Signed-off-by: Robbie Ko <robbieko@synology.com>
> ---
>   fs/btrfs/ctree.c   | 7 +++++--
>   fs/btrfs/ctree.h   | 2 +-
>   fs/btrfs/volumes.c | 1 +
>   3 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index 3a7648bff42c..abb9108e2d7d 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -2194,7 +2194,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
>   			if (nr == 0)
>   				break;
>   			nr--;
> -		} else if (path->reada == READA_FORWARD) {
> +		} else if (path->reada == READA_FORWARD || path->reada == READA_FORWARD_FORCE) {
>   			nr++;
>   			if (nr >= nritems)
>   				break;
> @@ -2205,12 +2205,15 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
>   				break;
>   		}
>   		search = btrfs_node_blockptr(node, nr);
> -		if ((search <= target && target - search <= 65536) ||
> +		if ((path->reada == READA_FORWARD_FORCE) ||
> +		    (search <= target && target - search <= 65536) ||
>   		    (search > target && search - target <= 65536)) {
>   			readahead_tree_block(fs_info, search);
>   			nread += blocksize;
>   		}
>   		nscan++;
> +		if (path->reada == READA_FORWARD_FORCE)
> +			continue;
>   		if ((nread > 65536 || nscan > 32))
>   			break;
>   	}
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index d404cce8ae40..808bcbdc9530 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -353,7 +353,7 @@ struct btrfs_node {
>    * The slots array records the index of the item or block pointer
>    * used while walking the tree.
>    */
> -enum { READA_NONE, READA_BACK, READA_FORWARD };
> +enum { READA_NONE, READA_BACK, READA_FORWARD, READA_FORWARD_FORCE };
>   struct btrfs_path {
>   	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
>   	int slots[BTRFS_MAX_LEVEL];
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 0d6e785bcb98..78fd65abff69 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -7043,6 +7043,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
>   	path = btrfs_alloc_path();
>   	if (!path)
>   		return -ENOMEM;
> +	path->reada = READA_FORWARD_FORCE;
>   
>   	/*
>   	 * uuid_mutex is needed only if we are mounting a sprout FS

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] btrfs: speedup mount time with force readahead chunk tree
  2020-07-01  9:24 [PATCH] btrfs: speedup mount time with force readahead chunk tree robbieko
  2020-07-01 10:10 ` Robbie Ko
@ 2020-07-01 10:58 ` Qu Wenruo
  2020-07-01 16:05   ` David Sterba
  1 sibling, 1 reply; 4+ messages in thread
From: Qu Wenruo @ 2020-07-01 10:58 UTC (permalink / raw)
  To: robbieko, fstests; +Cc: linux-btrfs

[-- Attachment #1.1: Type: text/plain, Size: 2995 bytes --]



On 2020/7/1 下午5:24, robbieko wrote:
> From: Robbie Ko <robbieko@synology.com>
> 
> When mounting, we always need to read the whole chunk tree,
> when there are too many chunk items, most of the time is
> spent on btrfs_read_chunk_tree, because we only read one
> leaf at a time.

Well, under most case it would be btrfs_read_block_groups(), unless all
data chunks are very compact with just several large data extents.

> 
> We fix this by adding a new readahead mode READA_FORWARD_FORCE,
> which reads all the leaves after the key in the node when
> reading a level 1 node.
> 
> Signed-off-by: Robbie Ko <robbieko@synology.com>
> ---
>  fs/btrfs/ctree.c   | 7 +++++--
>  fs/btrfs/ctree.h   | 2 +-
>  fs/btrfs/volumes.c | 1 +
>  3 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index 3a7648bff42c..abb9108e2d7d 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -2194,7 +2194,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
>  			if (nr == 0)
>  				break;
>  			nr--;
> -		} else if (path->reada == READA_FORWARD) {
> +		} else if (path->reada == READA_FORWARD || path->reada == READA_FORWARD_FORCE) {
>  			nr++;
>  			if (nr >= nritems)
>  				break;
> @@ -2205,12 +2205,15 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
>  				break;
>  		}
>  		search = btrfs_node_blockptr(node, nr);
> -		if ((search <= target && target - search <= 65536) ||
> +		if ((path->reada == READA_FORWARD_FORCE) ||
> +		    (search <= target && target - search <= 65536) ||
>  		    (search > target && search - target <= 65536)) {
>  			readahead_tree_block(fs_info, search);
>  			nread += blocksize;
>  		}
>  		nscan++;
> +		if (path->reada == READA_FORWARD_FORCE)
> +			continue;
>  		if ((nread > 65536 || nscan > 32))
>  			break;
>  	}
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index d404cce8ae40..808bcbdc9530 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -353,7 +353,7 @@ struct btrfs_node {
>   * The slots array records the index of the item or block pointer
>   * used while walking the tree.
>   */
> -enum { READA_NONE, READA_BACK, READA_FORWARD };
> +enum { READA_NONE, READA_BACK, READA_FORWARD, READA_FORWARD_FORCE };
>  struct btrfs_path {
>  	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
>  	int slots[BTRFS_MAX_LEVEL];
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 0d6e785bcb98..78fd65abff69 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -7043,6 +7043,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
>  	path = btrfs_alloc_path();
>  	if (!path)
>  		return -ENOMEM;
> +	path->reada = READA_FORWARD_FORCE;

Why not just use regular forward readahead?

Mind to share the reason here? Just to force reada for all tree leaves?

Thanks,
Qu

>  
>  	/*
>  	 * uuid_mutex is needed only if we are mounting a sprout FS
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] btrfs: speedup mount time with force readahead chunk tree
  2020-07-01 10:58 ` Qu Wenruo
@ 2020-07-01 16:05   ` David Sterba
  0 siblings, 0 replies; 4+ messages in thread
From: David Sterba @ 2020-07-01 16:05 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: robbieko, fstests, linux-btrfs

On Wed, Jul 01, 2020 at 06:58:55PM +0800, Qu Wenruo wrote:
> 
> 
> On 2020/7/1 下午5:24, robbieko wrote:
> > From: Robbie Ko <robbieko@synology.com>
> > 
> > When mounting, we always need to read the whole chunk tree,
> > when there are too many chunk items, most of the time is
> > spent on btrfs_read_chunk_tree, because we only read one
> > leaf at a time.
> 
> Well, under most case it would be btrfs_read_block_groups(), unless all
> data chunks are very compact with just several large data extents.

I've checked chunk tree on some filesystems:

- 1T, 40% used, chunk tree size 80K, 1 node, the rest are leaves
- 1T, 93% used, chunk tree size 112K, 1 node, the rest are leaves

so yeah readahead of chunk tree is not the part where it takes long.
For many-terabytes filesystems it would be stil in range of megabytes
and the chunk tree is not scattered.

We could do the readahead of block group items, it could speed up some
things and maybe worth trying. We have the async readahead API, ie.
start readahead on a given key and forget about it. Either it will be in
cache in time we read it or the proper read will be first.

> > --- a/fs/btrfs/ctree.h
> > +++ b/fs/btrfs/ctree.h
> > @@ -353,7 +353,7 @@ struct btrfs_node {
> >   * The slots array records the index of the item or block pointer
> >   * used while walking the tree.
> >   */
> > -enum { READA_NONE, READA_BACK, READA_FORWARD };
> > +enum { READA_NONE, READA_BACK, READA_FORWARD, READA_FORWARD_FORCE };
> >  struct btrfs_path {
> >  	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
> >  	int slots[BTRFS_MAX_LEVEL];
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 0d6e785bcb98..78fd65abff69 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -7043,6 +7043,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
> >  	path = btrfs_alloc_path();
> >  	if (!path)
> >  		return -ENOMEM;
> > +	path->reada = READA_FORWARD_FORCE;
> 
> Why not just use regular forward readahead?
> 
> Mind to share the reason here? Just to force reada for all tree leaves?

Maybe the current readahead is a good idea to do here anyway, we know
we'll need to read the whole chunk tree anyway so it's not wasteful.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-01  9:24 [PATCH] btrfs: speedup mount time with force readahead chunk tree robbieko
2020-07-01 10:10 ` Robbie Ko
2020-07-01 10:58 ` Qu Wenruo
2020-07-01 16:05   ` David Sterba

FSTests Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/fstests/0 fstests/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 fstests fstests/ https://lore.kernel.org/fstests \
		fstests@vger.kernel.org
	public-inbox-index fstests

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.fstests


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git