All of lore.kernel.org
 help / color / mirror / Atom feed
From: Li GuiFu via Linux-erofs <linux-erofs@lists.ozlabs.org>
To: Hu Weiwen <sehuww@mail.scut.edu.cn>,
	hsiangkao@redhat.com, linux-erofs@lists.ozlabs.org
Subject: Re: [PATCH v2] erofs-utils: use qsort() to sort dir->i_subdirs
Date: Sun, 11 Apr 2021 22:10:09 +0800	[thread overview]
Message-ID: <8f0140a5-c738-7890-eff7-eb877a40035d@aliyun.com> (raw)
In-Reply-To: <20210405093816.149621-1-sehuww@mail.scut.edu.cn>

Hu Weiwen
  It really do a high sort performance increase,
  I have a idea that keeping the erofs_prepare_dir_file() function
paramter stable, Using a independent function to do dirs sort.

On 2021/4/5 17:38, Hu Weiwen wrote:
> Original implementation use insertion sort, and its time complexity is
> O(n^2). This patch use qsort instead. When I create a directory with
> 100k entries, this reduces the user space time from around 3 mins to
> 0.5s.
> 
> Create such a large directory for benchmark with:
> mkdir large; cd large; touch $(seq 100000);
> 
> Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> ---
>  lib/inode.c | 53 +++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 33 insertions(+), 20 deletions(-)
> 
> diff --git a/lib/inode.c b/lib/inode.c
> index d52facf..ef55e88 100644
> --- a/lib/inode.c
> +++ b/lib/inode.c
> @@ -96,21 +96,6 @@ unsigned int erofs_iput(struct erofs_inode *inode)
>  	return 0;
>  }
> 
> -static int dentry_add_sorted(struct erofs_dentry *d, struct list_head *head)
> -{
> -	struct list_head *pos;
> -
> -	list_for_each(pos, head) {
> -		struct erofs_dentry *d2 =
> -			container_of(pos, struct erofs_dentry, d_child);
> -
> -		if (strcmp(d->name, d2->name) < 0)
> -			break;
> -	}
> -	list_add_tail(&d->d_child, pos);
> -	return 0;
> -}
> -
>  struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent,
>  				   const char *name)
>  {
> @@ -122,7 +107,7 @@ struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent,
>  	strncpy(d->name, name, EROFS_NAME_LEN - 1);
>  	d->name[EROFS_NAME_LEN - 1] = '\0';
> 
> -	dentry_add_sorted(d, &parent->i_subdirs);
> +	list_add_tail(&d->d_child, &parent->i_subdirs);
>  	return d;
>  }
> 
> @@ -156,10 +141,19 @@ static int __allocate_inode_bh_data(struct erofs_inode *inode,
>  	return 0;
>  }
> 
> +static int comp_subdir(const void *a, const void *b)
> +{
> +	const struct erofs_dentry *da, *db;
> +
> +	da = *((const struct erofs_dentry **)a);
> +	db = *((const struct erofs_dentry **)b);
> +	return strcmp(da->name, db->name);
> +}
> +
> -int erofs_prepare_dir_file(struct erofs_inode *dir)
> +int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs)
Todo 1: keep these function parameter stable

>  {
> -	struct erofs_dentry *d;
> -	unsigned int d_size, i_nlink;
> +	struct erofs_dentry *d, *n, **sorted_d;
> +	unsigned int d_size, i_nlink, i;
>  	int ret;
> 
>  	/* dot is pointed to the current dir inode */
> @@ -172,6 +166,22 @@ int erofs_prepare_dir_file(struct erofs_inode *dir)
>  	d->inode = erofs_igrab(dir->i_parent);
>  	d->type = EROFS_FT_DIR;
> 
> +	/* sort subdirs */
> +	nr_subdirs += 2;
> +	sorted_d = malloc(nr_subdirs * sizeof(d));
> +	if (!sorted_d)
> +		return -ENOMEM;
> +	i = 0;
> +	list_for_each_entry_safe(d, n, &dir->i_subdirs, d_child) {
> +		list_del(&d->d_child);
> +		sorted_d[i++] = d;
> +	}
> +	DBG_BUGON(i != nr_subdirs);
> +	qsort(sorted_d, nr_subdirs, sizeof(d), comp_subdir);
> +	for (i = 0; i < nr_subdirs; i++)
> +		list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs);
> +	free(sorted_d);
> +
Todo 2: make these codes refact to a independent function


>  	/* let's calculate dir size and update i_nlink */
>  	d_size = 0;
>  	i_nlink = 0;
> @@ -922,6 +932,7 @@ struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir)
>  	DIR *_dir;
>  	struct dirent *dp;
>  	struct erofs_dentry *d;
> +	unsigned int nr_subdirs;
> 
>  	ret = erofs_prepare_xattr_ibody(dir);
>  	if (ret < 0)
> @@ -961,6 +972,7 @@ struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir)
>  		return ERR_PTR(-errno);
>  	}
> 
> +	nr_subdirs = 0;
>  	while (1) {
>  		/*
>  		 * set errno to 0 before calling readdir() in order to
> @@ -984,6 +996,7 @@ struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir)
>  			ret = PTR_ERR(d);
>  			goto err_closedir;
>  		}
> +		nr_subdirs++;
> 
>  		/* to count i_nlink for directories */
>  		d->type = (dp->d_type == DT_DIR ?
> @@ -996,7 +1009,7 @@ struct erofs_inode *erofs_mkfs_build_tree(struct erofs_inode *dir)
>  	}
>  	closedir(_dir);
> 
> -	ret = erofs_prepare_dir_file(dir);
> +	ret = erofs_prepare_dir_file(dir, nr_subdirs);
Todo 3 : nr_subdirs may not be needed, it can be get from one
for-loop-count fixly.
If it may cause perfermance decrease, try to add a dir count in the
erofs_inode struct


>  	if (ret)
>  		goto err;
> 
> --
> 2.25.1
> 

  parent reply	other threads:[~2021-04-11 14:10 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-01 13:52 [PATCH] erofs-utils: use qsort() to sort dir->i_subdirs Hu Weiwen
2021-04-02  2:12 ` Gao Xiang
2021-04-02  2:17   ` Gao Xiang
2021-04-05  9:38     ` [PATCH v2] " Hu Weiwen
2021-04-05 11:24       ` Gao Xiang
2021-04-11 14:10       ` Li GuiFu via Linux-erofs [this message]
2021-04-11 14:41         ` Gao Xiang
2021-04-11 14:56           ` Li GuiFu via Linux-erofs

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8f0140a5-c738-7890-eff7-eb877a40035d@aliyun.com \
    --to=linux-erofs@lists.ozlabs.org \
    --cc=bluce.lee@aliyun.com \
    --cc=hsiangkao@redhat.com \
    --cc=sehuww@mail.scut.edu.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.