linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic
@ 2021-01-18 12:29 胡玮文
  2021-01-18 13:54 ` Gao Xiang
  0 siblings, 1 reply; 5+ messages in thread
From: 胡玮文 @ 2021-01-18 12:29 UTC (permalink / raw)
  To: Gao Xiang; +Cc: linux-erofs

Hi Xiang,

Thank you, your patch is indeed clearer. Could you explain why you don’t like my added erofs_dbg? I found them very useful when debugging.

I’ve updated the commit message, with some fixes (see inline) in PATCH v5 (coming soon).

> 在 2021年1月16日,14:41,Gao Xiang <hsiangkao@aol.com> 写道:
> 
> From: Hu Weiwen <sehuww@mail.scut.edu.cn>
> 
> When using EROFS to pack our dataset which consists of millions of
> files, mkfs.erofs is very slow compared with mksquashfs.
> 
> The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
> iterate over all previously allocated buffer blocks, making the
> complexity of the algrithm O(N^2) where N is the number of files.
> 
> With this patch:
> 
> * global `last_mapped_block` is mantained to avoid full scan in
> `erofs_mapbh` function.
> 
> * global `non_full_buffer_blocks` mantains a list of buffer block for
> each type and each possible remaining bytes in the block. Then it is
> used to identify the most suitable blocks in future `erofs_balloc`,
> avoiding full scan.
> 
> Some new data structure is allocated in this patch, more RAM usage is
> expected, but not much. When I test it with ImageNet dataset (1.33M
> files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
> spent on IO.
> 
> Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Signed-off-by: Gao Xiang <hsiangkao@aol.com>
> ---
> 
> I've simplified the most-fit finding logic of v3... Since buffers.off
> has to be aligned to alignsize, so I think it's better to use
> buffers.off as the index of mapped_buckets compared to using remaining
> size as it looks more straight-forward.
> 
> Also, I found the exist logic handling expended blocks might be
> potential ineffective as well... we have to skip used < used0 only
> after oob (extra blocks is allocated, so not expend such blocks but
> allocate a new bb...) It might be more effective to reuse such
> non-mapped buffer blocks...
> 
> Thanks,
> Gao Xiang
> 
> include/erofs/cache.h |  1 +
> lib/cache.c           | 91 +++++++++++++++++++++++++++++++++++++------
> 2 files changed, 81 insertions(+), 11 deletions(-)
> 
> diff --git a/include/erofs/cache.h b/include/erofs/cache.h
> index f8dff67b9736..611ca5b8432b 100644
> --- a/include/erofs/cache.h
> +++ b/include/erofs/cache.h
> @@ -39,6 +39,7 @@ struct erofs_buffer_head {
> 
> struct erofs_buffer_block {
> struct list_head list;
> +    struct list_head mapped_list;
> 
> erofs_blk_t blkaddr;
> int type;
> diff --git a/lib/cache.c b/lib/cache.c
> index 32a58311f563..a44e140bc77b 100644
> --- a/lib/cache.c
> +++ b/lib/cache.c
> @@ -18,6 +18,11 @@ static struct erofs_buffer_block blkh = {
> };
> static erofs_blk_t tail_blkaddr;
> 
> +/* buckets for all mapped buffer blocks to boost up allocation */
> +static struct list_head mapped_buckets[2][EROFS_BLKSIZ];
> +/* last mapped buffer block to accelerate erofs_mapbh() */
> +static struct erofs_buffer_block *last_mapped_block = &blkh;
> +
> static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
> {
> return erofs_bh_flush_generic_end(bh);
> @@ -62,12 +67,17 @@ struct erofs_bhops erofs_buf_write_bhops = {
> /* return buffer_head of erofs super block (with size 0) */
> struct erofs_buffer_head *erofs_buffer_init(void)
> {
> +    int i, j;
> struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
> 
> if (IS_ERR(bh))
>   return bh;
> 
> bh->op = &erofs_skip_write_bhops;
> +
> +    for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
> +        for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
> +            init_list_head(&mapped_buckets[i][j]);
> return bh;
> }
> 
> @@ -132,20 +142,55 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> struct erofs_buffer_block *cur, *bb;
> struct erofs_buffer_head *bh;
> unsigned int alignsize, used0, usedmax;
> +    unsigned int used_before, used;
> 
> int ret = get_alignsize(type, &type);
> 
> if (ret < 0)
>   return ERR_PTR(ret);
> +
> +    DBG_BUGON(type < 0 || type > META);
> alignsize = ret;
> 
> used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
> usedmax = 0;
> bb = NULL;
> 
> -    list_for_each_entry(cur, &blkh.list, list) {
> -        unsigned int used_before, used;
> +    if (!used0 || alignsize == EROFS_BLKSIZ)
> +        goto alloc;
> +
> +    /* try to find a most-fit mapped buffer block first */
> +    for (used_before = EROFS_BLKSIZ; used_before > 1; ) {

I would argue that it is more efficient if we use a more specific initial value for used_before.

> +        struct list_head *bt = mapped_buckets[type] + --used_before;
> +
> +        if (list_empty(bt))
> +            continue;
> +        cur = list_first_entry(bt, struct erofs_buffer_block,
> +                       mapped_list);
> +
> +        /* last mapped block can be expended, don't handle it here */
> +        if (cur == last_mapped_block)
> +            continue;
> +
> +        ret = __erofs_battach(cur, NULL, size, alignsize,
> +                      required_ext + inline_ext, true);
> +        if (ret < 0)

If used_before is chosen properly, this should never fail.

> +            continue;
> +
> +        /* should contain all data in the current block */
> +        used = ret + required_ext + inline_ext;
> +        DBG_BUGON(used > EROFS_BLKSIZ);
> +
> +        bb = cur;
> +        usedmax = used;
> +        break;
> +    }
> 
> +    /* try to start from the last mapped one, which can be expended */
> +    cur = last_mapped_block;
> +    if (cur == &blkh)
> +        cur = list_next_entry(cur, list);
> +    for (; cur != &blkh; cur = list_next_entry(cur, list)) {
>   used_before = cur->buffers.off % EROFS_BLKSIZ;
> 
>   /* skip if buffer block is just full */
> @@ -187,6 +232,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>   goto found;
> }
> 
> +alloc:
> /* allocate a new buffer block */
> if (used0 > EROFS_BLKSIZ)
>   return ERR_PTR(-ENOSPC);
> @@ -200,6 +246,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> bb->buffers.off = 0;
> init_list_head(&bb->buffers.list);
> list_add_tail(&bb->list, &blkh.list);
> +    init_list_head(&bb->mapped_list);
> 
> bh = malloc(sizeof(struct erofs_buffer_head));
> if (!bh) {
> @@ -214,6 +261,18 @@ found:
> return bh;
> }
> 
> +static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
> +{
> +    struct list_head *bkt;
> +
> +    if (bb->blkaddr == NULL_ADDR)
> +        return;
> +
> +    bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
> +    list_del(&bb->mapped_list);
> +    list_add_tail(&bb->mapped_list, bkt);
> +}
> +
> struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
>               int type, unsigned int size)
> {
> @@ -239,6 +298,7 @@ struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
>   free(nbh);
>   return ERR_PTR(ret);
> }
> +    erofs_bupdate_mapped(bb);

This line should goes into “__erofs_battach()”? Since “erofs_balloc()” only calls that function.

> return nbh;
> 
> }
> @@ -247,8 +307,11 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
> {
> erofs_blk_t blkaddr;
> 
> -    if (bb->blkaddr == NULL_ADDR)
> +    if (bb->blkaddr == NULL_ADDR) {
>   bb->blkaddr = tail_blkaddr;
> +        last_mapped_block = bb;
> +        erofs_bupdate_mapped(bb);
> +    }
> 
> blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
> if (blkaddr > tail_blkaddr)
> @@ -259,15 +322,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
> 
> erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb)
> {
> -    struct erofs_buffer_block *t, *nt;
> +    struct erofs_buffer_block *t = last_mapped_block;
> 
> -    if (!bb || bb->blkaddr == NULL_ADDR) {
> -        list_for_each_entry_safe(t, nt, &blkh.list, list) {
> -            (void)__erofs_mapbh(t);
> -            if (t == bb)
> -                break;
> -        }
> -    }
> +    do {
> +        t = list_next_entry(t, list);
> +        if (t == &blkh)
> +            break;
> +
> +        DBG_BUGON(t->blkaddr != NULL_ADDR);
> +        (void)__erofs_mapbh(t);
> +    } while (t != bb);
> return tail_blkaddr;
> }
> 
> @@ -309,6 +373,7 @@ bool erofs_bflush(struct erofs_buffer_block *bb)
> 
>   erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
> 
> +        list_del(&p->mapped_list);
>   list_del(&p->list);
>   free(p);
> }
> @@ -332,6 +397,10 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
> if (!list_empty(&bb->buffers.list))
>   return;
> 
> +    if (bb == last_mapped_block)
> +        last_mapped_block = list_prev_entry(bb, list);
> +
> +    list_del(&bb->mapped_list);
> list_del(&bb->list);
> free(bb);
> 
> -- 
> 2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread
* [PATCH v3 2/2] erofs-utils: optimize mkfs to O(N), N = #files
@ 2021-01-16  5:04 Gao Xiang via Linux-erofs
  2021-01-16  6:31 ` [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic Gao Xiang via Linux-erofs
  0 siblings, 1 reply; 5+ messages in thread
From: Gao Xiang via Linux-erofs @ 2021-01-16  5:04 UTC (permalink / raw)
  To: linux-erofs; +Cc: Hu Weiwen

From: Hu Weiwen <sehuww@mail.scut.edu.cn>

When using EROFS to pack our dataset which consists of millions of
files, mkfs.erofs is very slow compared with mksquashfs.

The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
iterate over all previously allocated buffer blocks, making the
complexity of the algrithm O(N^2) where N is the number of files.

With this patch:

* global `last_mapped_block` is mantained to avoid full scan in
  `erofs_mapbh` function.

* global `non_full_buffer_blocks` mantains a list of buffer block for
  each type and each possible remaining bytes in the block. Then it is
  used to identify the most suitable blocks in future `erofs_balloc`,
  avoiding full scan.

Some new data structure is allocated in this patch, more RAM usage is
expected, but not much. When I test it with ImageNet dataset (1.33M
files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
spent on IO.

Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao@aol.com>
---
 include/erofs/cache.h |  1 +
 lib/cache.c           | 97 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/include/erofs/cache.h b/include/erofs/cache.h
index f8dff67b9736..611ca5b8432b 100644
--- a/include/erofs/cache.h
+++ b/include/erofs/cache.h
@@ -39,6 +39,7 @@ struct erofs_buffer_head {
 
 struct erofs_buffer_block {
 	struct list_head list;
+	struct list_head mapped_list;
 
 	erofs_blk_t blkaddr;
 	int type;
diff --git a/lib/cache.c b/lib/cache.c
index 32a58311f563..17b2b096632c 100644
--- a/lib/cache.c
+++ b/lib/cache.c
@@ -18,6 +18,11 @@ static struct erofs_buffer_block blkh = {
 };
 static erofs_blk_t tail_blkaddr;
 
+/* buckets for all mapped buffer blocks to boost up allocation */
+static struct list_head mapped_buckets[2][EROFS_BLKSIZ];
+/* last mapped buffer block to accelerate erofs_mapbh() */
+static struct erofs_buffer_block *last_mapped_block = &blkh;
+
 static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
 {
 	return erofs_bh_flush_generic_end(bh);
@@ -62,12 +67,17 @@ struct erofs_bhops erofs_buf_write_bhops = {
 /* return buffer_head of erofs super block (with size 0) */
 struct erofs_buffer_head *erofs_buffer_init(void)
 {
+	int i, j;
 	struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
 
 	if (IS_ERR(bh))
 		return bh;
 
 	bh->op = &erofs_skip_write_bhops;
+
+	for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
+		for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
+			init_list_head(&mapped_buckets[i][j]);
 	return bh;
 }
 
@@ -132,20 +142,61 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
 	struct erofs_buffer_block *cur, *bb;
 	struct erofs_buffer_head *bh;
 	unsigned int alignsize, used0, usedmax;
+	unsigned int used_before, used;
 
 	int ret = get_alignsize(type, &type);
 
 	if (ret < 0)
 		return ERR_PTR(ret);
+
+	DBG_BUGON(type < 0 || type > META);
 	alignsize = ret;
 
 	used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
 	usedmax = 0;
 	bb = NULL;
 
-	list_for_each_entry(cur, &blkh.list, list) {
-		unsigned int used_before, used;
+	if (used0 == 0 || alignsize == EROFS_BLKSIZ)
+		goto alloc;
+
+	/* Try find a most fit mapped buffer block first. */
+	for (used_before = 1; used_before < EROFS_BLKSIZ; ++used_before) {
+		struct list_head *bt = mapped_buckets[type] + used_before;
+
+		if (list_empty(bt))
+			continue;
+		cur = list_first_entry(bt, struct erofs_buffer_block,
+				       mapped_list);
+
+		ret = __erofs_battach(cur, NULL, size, alignsize,
+				      required_ext + inline_ext, true);
+		if (ret < 0)
+			continue;
+
+		used = (ret + required_ext) % EROFS_BLKSIZ + inline_ext;
+
+		/* should contain inline data in current block */
+		if (used > EROFS_BLKSIZ)
+			continue;
+
+		/*
+		 * remaining should be smaller than before or
+		 * larger than allocating a new buffer block
+		 */
+		if (used < used_before && used < used0)
+			continue;
+
+		if (usedmax < used) {
+			bb = cur;
+			usedmax = used;
+		}
+	}
 
+	/* try to start from the last mapped one, which can be extended */
+	cur = last_mapped_block;
+	if (cur == &blkh)
+		cur = list_next_entry(cur, list);
+	for (; cur != &blkh; cur = list_next_entry(cur, list)) {
 		used_before = cur->buffers.off % EROFS_BLKSIZ;
 
 		/* skip if buffer block is just full */
@@ -187,6 +238,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
 		goto found;
 	}
 
+alloc:
 	/* allocate a new buffer block */
 	if (used0 > EROFS_BLKSIZ)
 		return ERR_PTR(-ENOSPC);
@@ -200,6 +252,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
 	bb->buffers.off = 0;
 	init_list_head(&bb->buffers.list);
 	list_add_tail(&bb->list, &blkh.list);
+	init_list_head(&bb->mapped_list);
 
 	bh = malloc(sizeof(struct erofs_buffer_head));
 	if (!bh) {
@@ -214,6 +267,18 @@ found:
 	return bh;
 }
 
+static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
+{
+	struct list_head *bkt;
+
+	if (bb->blkaddr == NULL_ADDR)
+		return;
+
+	bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
+	list_del(&bb->mapped_list);
+	list_add_tail(&bb->mapped_list, bkt);
+}
+
 struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
 					int type, unsigned int size)
 {
@@ -239,6 +304,7 @@ struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
 		free(nbh);
 		return ERR_PTR(ret);
 	}
+	erofs_bupdate_mapped(bb);
 	return nbh;
 
 }
@@ -247,8 +313,11 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
 {
 	erofs_blk_t blkaddr;
 
-	if (bb->blkaddr == NULL_ADDR)
+	if (bb->blkaddr == NULL_ADDR) {
 		bb->blkaddr = tail_blkaddr;
+		last_mapped_block = bb;
+		erofs_bupdate_mapped(bb);
+	}
 
 	blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
 	if (blkaddr > tail_blkaddr)
@@ -259,15 +328,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
 
 erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb)
 {
-	struct erofs_buffer_block *t, *nt;
+	struct erofs_buffer_block *t = last_mapped_block;
 
-	if (!bb || bb->blkaddr == NULL_ADDR) {
-		list_for_each_entry_safe(t, nt, &blkh.list, list) {
-			(void)__erofs_mapbh(t);
-			if (t == bb)
-				break;
-		}
-	}
+	do {
+		t = list_next_entry(t, list);
+		if (t == &blkh)
+			break;
+
+		DBG_BUGON(t->blkaddr != NULL_ADDR);
+		(void)__erofs_mapbh(t);
+	} while (t != bb);
 	return tail_blkaddr;
 }
 
@@ -309,6 +379,7 @@ bool erofs_bflush(struct erofs_buffer_block *bb)
 
 		erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
 
+		list_del(&p->mapped_list);
 		list_del(&p->list);
 		free(p);
 	}
@@ -332,6 +403,10 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
 	if (!list_empty(&bb->buffers.list))
 		return;
 
+	if (bb == last_mapped_block)
+		last_mapped_block = list_prev_entry(bb, list);
+
+	list_del(&bb->mapped_list);
 	list_del(&bb->list);
 	free(bb);
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-01-18 14:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-18 12:29 [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic 胡玮文
2021-01-18 13:54 ` Gao Xiang
  -- strict thread matches above, loose matches on Subject: below --
2021-01-16  5:04 [PATCH v3 2/2] erofs-utils: optimize mkfs to O(N), N = #files Gao Xiang via Linux-erofs
2021-01-16  6:31 ` [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic Gao Xiang via Linux-erofs
2021-01-18 13:02   ` 胡玮文
2021-01-18 14:06     ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).