From: Hu Weiwen <sehuww@mail.scut.edu.cn>
To: Gao Xiang <hsiangkao@redhat.com>,
Li Guifu <bluce.liguifu@huawei.com>,
Miao Xie <miaoxie@huawei.com>, Fang Wei <fangwei1@huawei.com>
Cc: Hu Weiwen <sehuww@mail.scut.edu.cn>, linux-erofs@lists.ozlabs.org
Subject: [PATCH v2 1/2] erofs-utils: optimize mkfs to O(N), N = #files
Date: Sat, 9 Jan 2021 16:28:09 +0800 [thread overview]
Message-ID: <20210109082810.32169-2-sehuww@mail.scut.edu.cn> (raw)
In-Reply-To: <20210109082810.32169-1-sehuww@mail.scut.edu.cn>
When using EROFS to pack our dataset which consists of millions of
files, mkfs.erofs is very slow compared with mksquashfs.
The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
iterate over all previously allocated buffer blocks, making the
complexity of the algrithm O(N^2) where N is the number of files.
With this patch:
* global `last_mapped_block` is mantained to avoid full scan in
`erofs_mapbh` function.
* global `non_full_buffer_blocks` mantains a list of buffer block for
each type and each possible remaining bytes in the block. Then it is
used to identify the most suitable blocks in future `erofs_balloc`,
avoiding full scan.
Some new data structure is allocated in this patch, more RAM usage is
expected, but not much. When I test it with ImageNet dataset (1.33M
files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
spent on IO.
Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
---
include/erofs/cache.h | 1 +
lib/cache.c | 89 ++++++++++++++++++++++++++++++++++++-------
2 files changed, 77 insertions(+), 13 deletions(-)
diff --git a/include/erofs/cache.h b/include/erofs/cache.h
index 8c171f5..b5bf6c0 100644
--- a/include/erofs/cache.h
+++ b/include/erofs/cache.h
@@ -39,6 +39,7 @@ struct erofs_buffer_head {
struct erofs_buffer_block {
struct list_head list;
+ struct list_head nonfull_list;
erofs_blk_t blkaddr;
int type;
diff --git a/lib/cache.c b/lib/cache.c
index 0d5c4a5..aa972d8 100644
--- a/lib/cache.c
+++ b/lib/cache.c
@@ -18,6 +18,15 @@ static struct erofs_buffer_block blkh = {
};
static erofs_blk_t tail_blkaddr;
+/**
+ * Some buffers are not full, we can reuse it to store more data
+ * 2 for DATA, META
+ * EROFS_BLKSIZ for each possible remaining bytes in the last block
+ **/
+static struct list_head nonfull_bb_buckets[2][EROFS_BLKSIZ];
+
+static struct erofs_buffer_block *last_mapped_block = &blkh;
+
static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
{
return erofs_bh_flush_generic_end(bh);
@@ -62,6 +71,10 @@ struct erofs_bhops erofs_buf_write_bhops = {
/* return buffer_head of erofs super block (with size 0) */
struct erofs_buffer_head *erofs_buffer_init(void)
{
+ for (int i = 0; i < 2; i++)
+ for (int j = 0; j < EROFS_BLKSIZ; j++)
+ init_list_head(&nonfull_bb_buckets[i][j]);
+
struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
if (IS_ERR(bh))
@@ -131,7 +144,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
{
struct erofs_buffer_block *cur, *bb;
struct erofs_buffer_head *bh;
- unsigned int alignsize, used0, usedmax;
+ unsigned int alignsize, used0, usedmax, total_size;
int ret = get_alignsize(type, &type);
@@ -143,7 +156,36 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
usedmax = 0;
bb = NULL;
- list_for_each_entry(cur, &blkh.list, list) {
+ erofs_dbg("balloc size=%lu alignsize=%u used0=%u", size, alignsize, used0);
+ if (used0 == 0 || alignsize == EROFS_BLKSIZ)
+ goto alloc;
+
+ total_size = size + required_ext + inline_ext;
+ if (total_size < EROFS_BLKSIZ) {
+ // Try find a most fit block.
+ DBG_BUGON(type < 0 || type > 1);
+ struct list_head *non_fulls = nonfull_bb_buckets[type];
+ for (struct list_head *r = non_fulls + round_up(total_size, alignsize);
+ r < non_fulls + EROFS_BLKSIZ; r++) {
+ if (!list_empty(r)) {
+ struct erofs_buffer_block *reuse_candidate =
+ list_first_entry(r, struct erofs_buffer_block, nonfull_list);
+ ret = __erofs_battach(reuse_candidate, NULL, size, alignsize,
+ required_ext + inline_ext, true);
+ if (ret >= 0) {
+ bb = reuse_candidate;
+ usedmax = (ret + required_ext) % EROFS_BLKSIZ + inline_ext;
+ }
+ break;
+ }
+ }
+ }
+
+ /* Try reuse last ones, which are not mapped and can be extended */
+ cur = last_mapped_block;
+ if (cur == &blkh)
+ cur = list_next_entry(cur, list);
+ for (; cur != &blkh; cur = list_next_entry(cur, list)) {
unsigned int used_before, used;
used_before = cur->buffers.off % EROFS_BLKSIZ;
@@ -181,16 +223,23 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
}
if (bb) {
+ erofs_dbg("reusing buffer. usedmax=%u", usedmax);
bh = malloc(sizeof(struct erofs_buffer_head));
if (!bh)
return ERR_PTR(-ENOMEM);
+ if (bb->nonfull_list.next != NULL)
+ list_del(&bb->nonfull_list);
goto found;
}
+alloc:
/* allocate a new buffer block */
- if (used0 > EROFS_BLKSIZ)
+ if (used0 > EROFS_BLKSIZ) {
+ erofs_dbg("used0 > EROFS_BLKSIZ");
return ERR_PTR(-ENOSPC);
+ }
+ erofs_dbg("new buffer block");
bb = malloc(sizeof(struct erofs_buffer_block));
if (!bb)
return ERR_PTR(-ENOMEM);
@@ -211,6 +260,14 @@ found:
required_ext + inline_ext, false);
if (ret < 0)
return ERR_PTR(ret);
+ if (ret == 0)
+ bb->nonfull_list.next = bb->nonfull_list.prev = NULL;
+ else {
+ /* This buffer is not full yet, we may reuse it */
+ int reusable_size = EROFS_BLKSIZ - ret - required_ext - inline_ext;
+ list_add_tail(&bb->nonfull_list, &nonfull_bb_buckets[type][reusable_size]);
+ }
+
return bh;
}
@@ -247,8 +304,10 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
{
erofs_blk_t blkaddr;
- if (bb->blkaddr == NULL_ADDR)
+ if (bb->blkaddr == NULL_ADDR) {
bb->blkaddr = tail_blkaddr;
+ last_mapped_block = bb;
+ }
blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
if (blkaddr > tail_blkaddr)
@@ -259,15 +318,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb, bool end)
{
- struct erofs_buffer_block *t, *nt;
-
- if (!bb || bb->blkaddr == NULL_ADDR) {
- list_for_each_entry_safe(t, nt, &blkh.list, list) {
- if (!end && (t == bb || nt == &blkh))
- break;
- (void)__erofs_mapbh(t);
- if (end && t == bb)
- break;
+ DBG_BUGON(!end);
+ struct erofs_buffer_block *t = last_mapped_block;
+ while (1) {
+ t = list_next_entry(t, list);
+ if (t == &blkh) {
+ break;
+ }
+ (void)__erofs_mapbh(t);
+ if (t == bb) {
+ break;
}
}
return tail_blkaddr;
@@ -334,6 +394,9 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
if (!list_empty(&bb->buffers.list))
return;
+ if (bb == last_mapped_block)
+ last_mapped_block = list_prev_entry(bb, list);
+
list_del(&bb->list);
free(bb);
--
2.17.1
next prev parent reply other threads:[~2021-01-09 8:48 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-01 9:11 [PATCH 1/2] erofs-utils: optimize mkfs to O(N), N = #files 胡玮文
2021-01-01 9:50 ` 胡 玮文
2021-01-02 5:09 ` Gao Xiang
2021-01-08 18:15 ` Gao Xiang
2021-01-09 8:28 ` [PATCH v2 0/2] Optimize " Hu Weiwen
2021-01-09 8:28 ` Hu Weiwen [this message]
2021-01-09 8:28 ` [PATCH v2 2/2] erofs-utils: refactor: remove end argument from erofs_mapbh Hu Weiwen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210109082810.32169-2-sehuww@mail.scut.edu.cn \
--to=sehuww@mail.scut.edu.cn \
--cc=bluce.liguifu@huawei.com \
--cc=fangwei1@huawei.com \
--cc=hsiangkao@redhat.com \
--cc=linux-erofs@lists.ozlabs.org \
--cc=miaoxie@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).