linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic
@ 2021-01-18 12:29 胡玮文
  2021-01-18 13:54 ` Gao Xiang
  0 siblings, 1 reply; 5+ messages in thread
From: 胡玮文 @ 2021-01-18 12:29 UTC (permalink / raw)
  To: Gao Xiang; +Cc: linux-erofs

Hi Xiang,

Thank you, your patch is indeed clearer. Could you explain why you don’t like my added erofs_dbg? I found them very useful when debugging.

I’ve updated the commit message, with some fixes (see inline) in PATCH v5 (coming soon).

> 在 2021年1月16日,14:41,Gao Xiang <hsiangkao@aol.com> 写道:
> 
> From: Hu Weiwen <sehuww@mail.scut.edu.cn>
> 
> When using EROFS to pack our dataset which consists of millions of
> files, mkfs.erofs is very slow compared with mksquashfs.
> 
> The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
> iterate over all previously allocated buffer blocks, making the
> complexity of the algrithm O(N^2) where N is the number of files.
> 
> With this patch:
> 
> * global `last_mapped_block` is mantained to avoid full scan in
> `erofs_mapbh` function.
> 
> * global `non_full_buffer_blocks` mantains a list of buffer block for
> each type and each possible remaining bytes in the block. Then it is
> used to identify the most suitable blocks in future `erofs_balloc`,
> avoiding full scan.
> 
> Some new data structure is allocated in this patch, more RAM usage is
> expected, but not much. When I test it with ImageNet dataset (1.33M
> files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
> spent on IO.
> 
> Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Signed-off-by: Gao Xiang <hsiangkao@aol.com>
> ---
> 
> I've simplified the most-fit finding logic of v3... Since buffers.off
> has to be aligned to alignsize, so I think it's better to use
> buffers.off as the index of mapped_buckets compared to using remaining
> size as it looks more straight-forward.
> 
> Also, I found the exist logic handling expended blocks might be
> potential ineffective as well... we have to skip used < used0 only
> after oob (extra blocks is allocated, so not expend such blocks but
> allocate a new bb...) It might be more effective to reuse such
> non-mapped buffer blocks...
> 
> Thanks,
> Gao Xiang
> 
> include/erofs/cache.h |  1 +
> lib/cache.c           | 91 +++++++++++++++++++++++++++++++++++++------
> 2 files changed, 81 insertions(+), 11 deletions(-)
> 
> diff --git a/include/erofs/cache.h b/include/erofs/cache.h
> index f8dff67b9736..611ca5b8432b 100644
> --- a/include/erofs/cache.h
> +++ b/include/erofs/cache.h
> @@ -39,6 +39,7 @@ struct erofs_buffer_head {
> 
> struct erofs_buffer_block {
> struct list_head list;
> +    struct list_head mapped_list;
> 
> erofs_blk_t blkaddr;
> int type;
> diff --git a/lib/cache.c b/lib/cache.c
> index 32a58311f563..a44e140bc77b 100644
> --- a/lib/cache.c
> +++ b/lib/cache.c
> @@ -18,6 +18,11 @@ static struct erofs_buffer_block blkh = {
> };
> static erofs_blk_t tail_blkaddr;
> 
> +/* buckets for all mapped buffer blocks to boost up allocation */
> +static struct list_head mapped_buckets[2][EROFS_BLKSIZ];
> +/* last mapped buffer block to accelerate erofs_mapbh() */
> +static struct erofs_buffer_block *last_mapped_block = &blkh;
> +
> static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
> {
> return erofs_bh_flush_generic_end(bh);
> @@ -62,12 +67,17 @@ struct erofs_bhops erofs_buf_write_bhops = {
> /* return buffer_head of erofs super block (with size 0) */
> struct erofs_buffer_head *erofs_buffer_init(void)
> {
> +    int i, j;
> struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
> 
> if (IS_ERR(bh))
>   return bh;
> 
> bh->op = &erofs_skip_write_bhops;
> +
> +    for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
> +        for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
> +            init_list_head(&mapped_buckets[i][j]);
> return bh;
> }
> 
> @@ -132,20 +142,55 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> struct erofs_buffer_block *cur, *bb;
> struct erofs_buffer_head *bh;
> unsigned int alignsize, used0, usedmax;
> +    unsigned int used_before, used;
> 
> int ret = get_alignsize(type, &type);
> 
> if (ret < 0)
>   return ERR_PTR(ret);
> +
> +    DBG_BUGON(type < 0 || type > META);
> alignsize = ret;
> 
> used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
> usedmax = 0;
> bb = NULL;
> 
> -    list_for_each_entry(cur, &blkh.list, list) {
> -        unsigned int used_before, used;
> +    if (!used0 || alignsize == EROFS_BLKSIZ)
> +        goto alloc;
> +
> +    /* try to find a most-fit mapped buffer block first */
> +    for (used_before = EROFS_BLKSIZ; used_before > 1; ) {

I would argue that it is more efficient if we use a more specific initial value for used_before.

> +        struct list_head *bt = mapped_buckets[type] + --used_before;
> +
> +        if (list_empty(bt))
> +            continue;
> +        cur = list_first_entry(bt, struct erofs_buffer_block,
> +                       mapped_list);
> +
> +        /* last mapped block can be expended, don't handle it here */
> +        if (cur == last_mapped_block)
> +            continue;
> +
> +        ret = __erofs_battach(cur, NULL, size, alignsize,
> +                      required_ext + inline_ext, true);
> +        if (ret < 0)

If used_before is chosen properly, this should never fail.

> +            continue;
> +
> +        /* should contain all data in the current block */
> +        used = ret + required_ext + inline_ext;
> +        DBG_BUGON(used > EROFS_BLKSIZ);
> +
> +        bb = cur;
> +        usedmax = used;
> +        break;
> +    }
> 
> +    /* try to start from the last mapped one, which can be expended */
> +    cur = last_mapped_block;
> +    if (cur == &blkh)
> +        cur = list_next_entry(cur, list);
> +    for (; cur != &blkh; cur = list_next_entry(cur, list)) {
>   used_before = cur->buffers.off % EROFS_BLKSIZ;
> 
>   /* skip if buffer block is just full */
> @@ -187,6 +232,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>   goto found;
> }
> 
> +alloc:
> /* allocate a new buffer block */
> if (used0 > EROFS_BLKSIZ)
>   return ERR_PTR(-ENOSPC);
> @@ -200,6 +246,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> bb->buffers.off = 0;
> init_list_head(&bb->buffers.list);
> list_add_tail(&bb->list, &blkh.list);
> +    init_list_head(&bb->mapped_list);
> 
> bh = malloc(sizeof(struct erofs_buffer_head));
> if (!bh) {
> @@ -214,6 +261,18 @@ found:
> return bh;
> }
> 
> +static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
> +{
> +    struct list_head *bkt;
> +
> +    if (bb->blkaddr == NULL_ADDR)
> +        return;
> +
> +    bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
> +    list_del(&bb->mapped_list);
> +    list_add_tail(&bb->mapped_list, bkt);
> +}
> +
> struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
>               int type, unsigned int size)
> {
> @@ -239,6 +298,7 @@ struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
>   free(nbh);
>   return ERR_PTR(ret);
> }
> +    erofs_bupdate_mapped(bb);

This line should goes into “__erofs_battach()”? Since “erofs_balloc()” only calls that function.

> return nbh;
> 
> }
> @@ -247,8 +307,11 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
> {
> erofs_blk_t blkaddr;
> 
> -    if (bb->blkaddr == NULL_ADDR)
> +    if (bb->blkaddr == NULL_ADDR) {
>   bb->blkaddr = tail_blkaddr;
> +        last_mapped_block = bb;
> +        erofs_bupdate_mapped(bb);
> +    }
> 
> blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
> if (blkaddr > tail_blkaddr)
> @@ -259,15 +322,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
> 
> erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb)
> {
> -    struct erofs_buffer_block *t, *nt;
> +    struct erofs_buffer_block *t = last_mapped_block;
> 
> -    if (!bb || bb->blkaddr == NULL_ADDR) {
> -        list_for_each_entry_safe(t, nt, &blkh.list, list) {
> -            (void)__erofs_mapbh(t);
> -            if (t == bb)
> -                break;
> -        }
> -    }
> +    do {
> +        t = list_next_entry(t, list);
> +        if (t == &blkh)
> +            break;
> +
> +        DBG_BUGON(t->blkaddr != NULL_ADDR);
> +        (void)__erofs_mapbh(t);
> +    } while (t != bb);
> return tail_blkaddr;
> }
> 
> @@ -309,6 +373,7 @@ bool erofs_bflush(struct erofs_buffer_block *bb)
> 
>   erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
> 
> +        list_del(&p->mapped_list);
>   list_del(&p->list);
>   free(p);
> }
> @@ -332,6 +397,10 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
> if (!list_empty(&bb->buffers.list))
>   return;
> 
> +    if (bb == last_mapped_block)
> +        last_mapped_block = list_prev_entry(bb, list);
> +
> +    list_del(&bb->mapped_list);
> list_del(&bb->list);
> free(bb);
> 
> -- 
> 2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic
  2021-01-18 12:29 [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic 胡玮文
@ 2021-01-18 13:54 ` Gao Xiang
  0 siblings, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2021-01-18 13:54 UTC (permalink / raw)
  To: 胡玮文; +Cc: linux-erofs

Hi Weiwen,

On Mon, Jan 18, 2021 at 08:29:23PM +0800, 胡玮文 wrote:
> Hi Xiang,
> 
> Thank you, your patch is indeed clearer. Could you explain why you don’t like my added erofs_dbg? I found them very useful when debugging.

I do like your erofs_dbg, but we might need to shift(e.g. rename to
erofs_verbose? not sure...) some likewise information

erofs_dbg("Writing %u uncompressed data to block %u",

to another dbglevel (since such message might be helpful for end
users... not only developers...)

Also, we might need to reorganize the English words ... Honestly,
I'm not good at this as well.

> 
> I’ve updated the commit message, with some fixes (see inline) in PATCH v5 (coming soon).

okay.

> 
> > 在 2021年1月16日,14:41,Gao Xiang <hsiangkao@aol.com> 写道:
> > 
> > From: Hu Weiwen <sehuww@mail.scut.edu.cn>
> > 
> > When using EROFS to pack our dataset which consists of millions of
> > files, mkfs.erofs is very slow compared with mksquashfs.
> > 
> > The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
> > iterate over all previously allocated buffer blocks, making the
> > complexity of the algrithm O(N^2) where N is the number of files.
> > 
> > With this patch:
> > 
> > * global `last_mapped_block` is mantained to avoid full scan in
> > `erofs_mapbh` function.
> > 
> > * global `non_full_buffer_blocks` mantains a list of buffer block for
> > each type and each possible remaining bytes in the block. Then it is
> > used to identify the most suitable blocks in future `erofs_balloc`,
> > avoiding full scan.
> > 
> > Some new data structure is allocated in this patch, more RAM usage is
> > expected, but not much. When I test it with ImageNet dataset (1.33M
> > files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
> > spent on IO.
> > 
> > Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> > Signed-off-by: Gao Xiang <hsiangkao@aol.com>
> > ---
> > 
> > I've simplified the most-fit finding logic of v3... Since buffers.off
> > has to be aligned to alignsize, so I think it's better to use
> > buffers.off as the index of mapped_buckets compared to using remaining
> > size as it looks more straight-forward.
> > 
> > Also, I found the exist logic handling expended blocks might be
> > potential ineffective as well... we have to skip used < used0 only
> > after oob (extra blocks is allocated, so not expend such blocks but
> > allocate a new bb...) It might be more effective to reuse such
> > non-mapped buffer blocks...
> > 
> > Thanks,
> > Gao Xiang
> > 
> > include/erofs/cache.h |  1 +
> > lib/cache.c           | 91 +++++++++++++++++++++++++++++++++++++------
> > 2 files changed, 81 insertions(+), 11 deletions(-)
> > 
> > diff --git a/include/erofs/cache.h b/include/erofs/cache.h
> > index f8dff67b9736..611ca5b8432b 100644
> > --- a/include/erofs/cache.h
> > +++ b/include/erofs/cache.h
> > @@ -39,6 +39,7 @@ struct erofs_buffer_head {
> > 
> > struct erofs_buffer_block {
> > struct list_head list;
> > +    struct list_head mapped_list;
> > 
> > erofs_blk_t blkaddr;
> > int type;
> > diff --git a/lib/cache.c b/lib/cache.c
> > index 32a58311f563..a44e140bc77b 100644
> > --- a/lib/cache.c
> > +++ b/lib/cache.c
> > @@ -18,6 +18,11 @@ static struct erofs_buffer_block blkh = {
> > };
> > static erofs_blk_t tail_blkaddr;
> > 
> > +/* buckets for all mapped buffer blocks to boost up allocation */
> > +static struct list_head mapped_buckets[2][EROFS_BLKSIZ];
> > +/* last mapped buffer block to accelerate erofs_mapbh() */
> > +static struct erofs_buffer_block *last_mapped_block = &blkh;
> > +
> > static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
> > {
> > return erofs_bh_flush_generic_end(bh);
> > @@ -62,12 +67,17 @@ struct erofs_bhops erofs_buf_write_bhops = {
> > /* return buffer_head of erofs super block (with size 0) */
> > struct erofs_buffer_head *erofs_buffer_init(void)
> > {
> > +    int i, j;
> > struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
> > 
> > if (IS_ERR(bh))
> >   return bh;
> > 
> > bh->op = &erofs_skip_write_bhops;
> > +
> > +    for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
> > +        for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
> > +            init_list_head(&mapped_buckets[i][j]);
> > return bh;
> > }
> > 
> > @@ -132,20 +142,55 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> > struct erofs_buffer_block *cur, *bb;
> > struct erofs_buffer_head *bh;
> > unsigned int alignsize, used0, usedmax;
> > +    unsigned int used_before, used;
> > 
> > int ret = get_alignsize(type, &type);
> > 
> > if (ret < 0)
> >   return ERR_PTR(ret);
> > +
> > +    DBG_BUGON(type < 0 || type > META);
> > alignsize = ret;
> > 
> > used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
> > usedmax = 0;
> > bb = NULL;
> > 
> > -    list_for_each_entry(cur, &blkh.list, list) {
> > -        unsigned int used_before, used;
> > +    if (!used0 || alignsize == EROFS_BLKSIZ)
> > +        goto alloc;
> > +
> > +    /* try to find a most-fit mapped buffer block first */
> > +    for (used_before = EROFS_BLKSIZ; used_before > 1; ) {
> 
> I would argue that it is more efficient if we use a more specific initial value for used_before.

will see your next version later, I think I'm out of condition
for now...

> 
> > +        struct list_head *bt = mapped_buckets[type] + --used_before;
> > +
> > +        if (list_empty(bt))
> > +            continue;
> > +        cur = list_first_entry(bt, struct erofs_buffer_block,
> > +                       mapped_list);
> > +
> > +        /* last mapped block can be expended, don't handle it here */
> > +        if (cur == last_mapped_block)
> > +            continue;
> > +
> > +        ret = __erofs_battach(cur, NULL, size, alignsize,
> > +                      required_ext + inline_ext, true);
> > +        if (ret < 0)
> 
> If used_before is chosen properly, this should never fail.

Yeah, but my idea is that we shouldn't rely on the internal
implementation logic of __erofs_battach in case of its potential
logic change. Otherwise, we would suffer from it laterly...

> 
> > +            continue;
> > +
> > +        /* should contain all data in the current block */
> > +        used = ret + required_ext + inline_ext;
> > +        DBG_BUGON(used > EROFS_BLKSIZ);
> > +
> > +        bb = cur;
> > +        usedmax = used;
> > +        break;
> > +    }
> > 
> > +    /* try to start from the last mapped one, which can be expended */
> > +    cur = last_mapped_block;
> > +    if (cur == &blkh)
> > +        cur = list_next_entry(cur, list);
> > +    for (; cur != &blkh; cur = list_next_entry(cur, list)) {
> >   used_before = cur->buffers.off % EROFS_BLKSIZ;
> > 
> >   /* skip if buffer block is just full */
> > @@ -187,6 +232,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> >   goto found;
> > }
> > 
> > +alloc:
> > /* allocate a new buffer block */
> > if (used0 > EROFS_BLKSIZ)
> >   return ERR_PTR(-ENOSPC);
> > @@ -200,6 +246,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
> > bb->buffers.off = 0;
> > init_list_head(&bb->buffers.list);
> > list_add_tail(&bb->list, &blkh.list);
> > +    init_list_head(&bb->mapped_list);
> > 
> > bh = malloc(sizeof(struct erofs_buffer_head));
> > if (!bh) {
> > @@ -214,6 +261,18 @@ found:
> > return bh;
> > }
> > 
> > +static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
> > +{
> > +    struct list_head *bkt;
> > +
> > +    if (bb->blkaddr == NULL_ADDR)
> > +        return;
> > +
> > +    bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
> > +    list_del(&bb->mapped_list);
> > +    list_add_tail(&bb->mapped_list, bkt);
> > +}
> > +
> > struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
> >               int type, unsigned int size)
> > {
> > @@ -239,6 +298,7 @@ struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
> >   free(nbh);
> >   return ERR_PTR(ret);
> > }
> > +    erofs_bupdate_mapped(bb);
> 
> This line should goes into “__erofs_battach()”? Since “erofs_balloc()” only calls that function.

(personally...) I'd like to keep this line here, since __erofs_battach also
includes dry_run, and it mainly focus on calculation and handling bh rather than bb....

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic
  2021-01-18 13:02   ` 胡玮文
@ 2021-01-18 14:06     ` Gao Xiang
  0 siblings, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2021-01-18 14:06 UTC (permalink / raw)
  To: 胡玮文; +Cc: linux-erofs

Hi Weiwen,

On Mon, Jan 18, 2021 at 09:02:16PM +0800, 胡玮文 wrote:
> 
> > 在 2021年1月16日,14:41,Gao Xiang <hsiangkao@aol.com> 写道:
> > 
> > From: Hu Weiwen <sehuww@mail.scut.edu.cn>
> > 
> > When using EROFS to pack our dataset which consists of millions of
> > files, mkfs.erofs is very slow compared with mksquashfs.
> > 
> > The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
> > iterate over all previously allocated buffer blocks, making the
> > complexity of the algrithm O(N^2) where N is the number of files.
> > 
> > With this patch:
> > 
> > * global `last_mapped_block` is mantained to avoid full scan in
> >  `erofs_mapbh` function.
> > 
> > * global `non_full_buffer_blocks` mantains a list of buffer block for
> >  each type and each possible remaining bytes in the block. Then it is
> >  used to identify the most suitable blocks in future `erofs_balloc`,
> >  avoiding full scan.
> > 
> > Some new data structure is allocated in this patch, more RAM usage is
> > expected, but not much. When I test it with ImageNet dataset (1.33M
> > files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
> > spent on IO.
> > 
> > Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> > Signed-off-by: Gao Xiang <hsiangkao@aol.com>
> > ---
> > 
> > I've simplified the most-fit finding logic of v3... Since buffers.off
> > has to be aligned to alignsize, so I think it's better to use
> > buffers.off as the index of mapped_buckets compared to using remaining
> > size as it looks more straight-forward.
> > 
> > Also, I found the exist logic handling expended blocks might be
> > potential ineffective as well... we have to skip used < used0 only
> > after oob (extra blocks is allocated, so not expend such blocks but
> > allocate a new bb...) It might be more effective to reuse such
> > non-mapped buffer blocks...
> 
> I don’t get this. If not oob, then “used” should be larger than “used_before”, then we will not skip such block anyway, right?

Sorry, I think I was completely misleaded by the comment above the
code at that time. It's too far to get the original intention :(
I think you're right (anyway, I think the main purpose of this
condition was that I didn't want to introduce too many new bbs
with a lot of large unused space. But anyway such condition is
approximate since it's actually a 0-1 Knapsack problem). Please
ignore my previous words about this...

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic
  2021-01-16  6:31 ` [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic Gao Xiang via Linux-erofs
@ 2021-01-18 13:02   ` 胡玮文
  2021-01-18 14:06     ` Gao Xiang
  0 siblings, 1 reply; 5+ messages in thread
From: 胡玮文 @ 2021-01-18 13:02 UTC (permalink / raw)
  To: Gao Xiang; +Cc: linux-erofs


> 在 2021年1月16日,14:41,Gao Xiang <hsiangkao@aol.com> 写道:
> 
> From: Hu Weiwen <sehuww@mail.scut.edu.cn>
> 
> When using EROFS to pack our dataset which consists of millions of
> files, mkfs.erofs is very slow compared with mksquashfs.
> 
> The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
> iterate over all previously allocated buffer blocks, making the
> complexity of the algrithm O(N^2) where N is the number of files.
> 
> With this patch:
> 
> * global `last_mapped_block` is mantained to avoid full scan in
>  `erofs_mapbh` function.
> 
> * global `non_full_buffer_blocks` mantains a list of buffer block for
>  each type and each possible remaining bytes in the block. Then it is
>  used to identify the most suitable blocks in future `erofs_balloc`,
>  avoiding full scan.
> 
> Some new data structure is allocated in this patch, more RAM usage is
> expected, but not much. When I test it with ImageNet dataset (1.33M
> files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
> spent on IO.
> 
> Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
> Signed-off-by: Gao Xiang <hsiangkao@aol.com>
> ---
> 
> I've simplified the most-fit finding logic of v3... Since buffers.off
> has to be aligned to alignsize, so I think it's better to use
> buffers.off as the index of mapped_buckets compared to using remaining
> size as it looks more straight-forward.
> 
> Also, I found the exist logic handling expended blocks might be
> potential ineffective as well... we have to skip used < used0 only
> after oob (extra blocks is allocated, so not expend such blocks but
> allocate a new bb...) It might be more effective to reuse such
> non-mapped buffer blocks...

I don’t get this. If not oob, then “used” should be larger than “used_before”, then we will not skip such block anyway, right?

> Thanks,
> Gao Xiang
> 
> include/erofs/cache.h |  1 +
> lib/cache.c           | 91 +++++++++++++++++++++++++++++++++++++------
> 2 files changed, 81 insertions(+), 11 deletions(-)
> 
> diff --git a/include/erofs/cache.h b/include/erofs/cache.h
> index f8dff67b9736..611ca5b8432b 100644
> --- a/include/erofs/cache.h
> +++ b/include/erofs/cache.h
> @@ -39,6 +39,7 @@ struct erofs_buffer_head {
> 
> struct erofs_buffer_block {
>    struct list_head list;
> +    struct list_head mapped_list;
> 
>    erofs_blk_t blkaddr;
>    int type;
> diff --git a/lib/cache.c b/lib/cache.c
> index 32a58311f563..a44e140bc77b 100644
> --- a/lib/cache.c
> +++ b/lib/cache.c
> @@ -18,6 +18,11 @@ static struct erofs_buffer_block blkh = {
> };
> static erofs_blk_t tail_blkaddr;
> 
> +/* buckets for all mapped buffer blocks to boost up allocation */
> +static struct list_head mapped_buckets[2][EROFS_BLKSIZ];
> +/* last mapped buffer block to accelerate erofs_mapbh() */
> +static struct erofs_buffer_block *last_mapped_block = &blkh;
> +
> static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
> {
>    return erofs_bh_flush_generic_end(bh);
> @@ -62,12 +67,17 @@ struct erofs_bhops erofs_buf_write_bhops = {
> /* return buffer_head of erofs super block (with size 0) */
> struct erofs_buffer_head *erofs_buffer_init(void)
> {
> +    int i, j;
>    struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
> 
>    if (IS_ERR(bh))
>        return bh;
> 
>    bh->op = &erofs_skip_write_bhops;
> +
> +    for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
> +        for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
> +            init_list_head(&mapped_buckets[i][j]);
>    return bh;
> }
> 
> @@ -132,20 +142,55 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>    struct erofs_buffer_block *cur, *bb;
>    struct erofs_buffer_head *bh;
>    unsigned int alignsize, used0, usedmax;
> +    unsigned int used_before, used;
> 
>    int ret = get_alignsize(type, &type);
> 
>    if (ret < 0)
>        return ERR_PTR(ret);
> +
> +    DBG_BUGON(type < 0 || type > META);
>    alignsize = ret;
> 
>    used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
>    usedmax = 0;
>    bb = NULL;
> 
> -    list_for_each_entry(cur, &blkh.list, list) {
> -        unsigned int used_before, used;
> +    if (!used0 || alignsize == EROFS_BLKSIZ)
> +        goto alloc;
> +
> +    /* try to find a most-fit mapped buffer block first */
> +    for (used_before = EROFS_BLKSIZ; used_before > 1; ) {
> +        struct list_head *bt = mapped_buckets[type] + --used_before;
> +
> +        if (list_empty(bt))
> +            continue;
> +        cur = list_first_entry(bt, struct erofs_buffer_block,
> +                       mapped_list);
> +
> +        /* last mapped block can be expended, don't handle it here */
> +        if (cur == last_mapped_block)
> +            continue;
> +
> +        ret = __erofs_battach(cur, NULL, size, alignsize,
> +                      required_ext + inline_ext, true);
> +        if (ret < 0)
> +            continue;
> +
> +        /* should contain all data in the current block */
> +        used = ret + required_ext + inline_ext;
> +        DBG_BUGON(used > EROFS_BLKSIZ);
> +
> +        bb = cur;
> +        usedmax = used;
> +        break;
> +    }
> 
> +    /* try to start from the last mapped one, which can be expended */
> +    cur = last_mapped_block;
> +    if (cur == &blkh)
> +        cur = list_next_entry(cur, list);
> +    for (; cur != &blkh; cur = list_next_entry(cur, list)) {
>        used_before = cur->buffers.off % EROFS_BLKSIZ;
> 
>        /* skip if buffer block is just full */
> @@ -187,6 +232,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>        goto found;
>    }
> 
> +alloc:
>    /* allocate a new buffer block */
>    if (used0 > EROFS_BLKSIZ)
>        return ERR_PTR(-ENOSPC);
> @@ -200,6 +246,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
>    bb->buffers.off = 0;
>    init_list_head(&bb->buffers.list);
>    list_add_tail(&bb->list, &blkh.list);
> +    init_list_head(&bb->mapped_list);
> 
>    bh = malloc(sizeof(struct erofs_buffer_head));
>    if (!bh) {
> @@ -214,6 +261,18 @@ found:
>    return bh;
> }
> 
> +static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
> +{
> +    struct list_head *bkt;
> +
> +    if (bb->blkaddr == NULL_ADDR)
> +        return;
> +
> +    bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
> +    list_del(&bb->mapped_list);
> +    list_add_tail(&bb->mapped_list, bkt);
> +}
> +
> struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
>                    int type, unsigned int size)
> {
> @@ -239,6 +298,7 @@ struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
>        free(nbh);
>        return ERR_PTR(ret);
>    }
> +    erofs_bupdate_mapped(bb);
>    return nbh;
> 
> }
> @@ -247,8 +307,11 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
> {
>    erofs_blk_t blkaddr;
> 
> -    if (bb->blkaddr == NULL_ADDR)
> +    if (bb->blkaddr == NULL_ADDR) {
>        bb->blkaddr = tail_blkaddr;
> +        last_mapped_block = bb;
> +        erofs_bupdate_mapped(bb);
> +    }
> 
>    blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
>    if (blkaddr > tail_blkaddr)
> @@ -259,15 +322,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
> 
> erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb)
> {
> -    struct erofs_buffer_block *t, *nt;
> +    struct erofs_buffer_block *t = last_mapped_block;
> 
> -    if (!bb || bb->blkaddr == NULL_ADDR) {
> -        list_for_each_entry_safe(t, nt, &blkh.list, list) {
> -            (void)__erofs_mapbh(t);
> -            if (t == bb)
> -                break;
> -        }
> -    }
> +    do {
> +        t = list_next_entry(t, list);
> +        if (t == &blkh)
> +            break;
> +
> +        DBG_BUGON(t->blkaddr != NULL_ADDR);
> +        (void)__erofs_mapbh(t);
> +    } while (t != bb);
>    return tail_blkaddr;
> }
> 
> @@ -309,6 +373,7 @@ bool erofs_bflush(struct erofs_buffer_block *bb)
> 
>        erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
> 
> +        list_del(&p->mapped_list);
>        list_del(&p->list);
>        free(p);
>    }
> @@ -332,6 +397,10 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
>    if (!list_empty(&bb->buffers.list))
>        return;
> 
> +    if (bb == last_mapped_block)
> +        last_mapped_block = list_prev_entry(bb, list);
> +
> +    list_del(&bb->mapped_list);
>    list_del(&bb->list);
>    free(bb);
> 
> -- 
> 2.24.0
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic
  2021-01-16  5:04 [PATCH v3 2/2] erofs-utils: optimize mkfs to O(N), N = #files Gao Xiang via Linux-erofs
@ 2021-01-16  6:31 ` Gao Xiang via Linux-erofs
  2021-01-18 13:02   ` 胡玮文
  0 siblings, 1 reply; 5+ messages in thread
From: Gao Xiang via Linux-erofs @ 2021-01-16  6:31 UTC (permalink / raw)
  To: linux-erofs; +Cc: Hu Weiwen

From: Hu Weiwen <sehuww@mail.scut.edu.cn>

When using EROFS to pack our dataset which consists of millions of
files, mkfs.erofs is very slow compared with mksquashfs.

The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
iterate over all previously allocated buffer blocks, making the
complexity of the algrithm O(N^2) where N is the number of files.

With this patch:

* global `last_mapped_block` is mantained to avoid full scan in
  `erofs_mapbh` function.

* global `non_full_buffer_blocks` mantains a list of buffer block for
  each type and each possible remaining bytes in the block. Then it is
  used to identify the most suitable blocks in future `erofs_balloc`,
  avoiding full scan.

Some new data structure is allocated in this patch, more RAM usage is
expected, but not much. When I test it with ImageNet dataset (1.33M
files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
spent on IO.

Signed-off-by: Hu Weiwen <sehuww@mail.scut.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao@aol.com>
---

I've simplified the most-fit finding logic of v3... Since buffers.off
has to be aligned to alignsize, so I think it's better to use
buffers.off as the index of mapped_buckets compared to using remaining
size as it looks more straight-forward.

Also, I found the exist logic handling expended blocks might be
potential ineffective as well... we have to skip used < used0 only
after oob (extra blocks is allocated, so not expend such blocks but
allocate a new bb...) It might be more effective to reuse such
non-mapped buffer blocks...

Thanks,
Gao Xiang

 include/erofs/cache.h |  1 +
 lib/cache.c           | 91 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/include/erofs/cache.h b/include/erofs/cache.h
index f8dff67b9736..611ca5b8432b 100644
--- a/include/erofs/cache.h
+++ b/include/erofs/cache.h
@@ -39,6 +39,7 @@ struct erofs_buffer_head {
 
 struct erofs_buffer_block {
 	struct list_head list;
+	struct list_head mapped_list;
 
 	erofs_blk_t blkaddr;
 	int type;
diff --git a/lib/cache.c b/lib/cache.c
index 32a58311f563..a44e140bc77b 100644
--- a/lib/cache.c
+++ b/lib/cache.c
@@ -18,6 +18,11 @@ static struct erofs_buffer_block blkh = {
 };
 static erofs_blk_t tail_blkaddr;
 
+/* buckets for all mapped buffer blocks to boost up allocation */
+static struct list_head mapped_buckets[2][EROFS_BLKSIZ];
+/* last mapped buffer block to accelerate erofs_mapbh() */
+static struct erofs_buffer_block *last_mapped_block = &blkh;
+
 static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
 {
 	return erofs_bh_flush_generic_end(bh);
@@ -62,12 +67,17 @@ struct erofs_bhops erofs_buf_write_bhops = {
 /* return buffer_head of erofs super block (with size 0) */
 struct erofs_buffer_head *erofs_buffer_init(void)
 {
+	int i, j;
 	struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
 
 	if (IS_ERR(bh))
 		return bh;
 
 	bh->op = &erofs_skip_write_bhops;
+
+	for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
+		for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
+			init_list_head(&mapped_buckets[i][j]);
 	return bh;
 }
 
@@ -132,20 +142,55 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
 	struct erofs_buffer_block *cur, *bb;
 	struct erofs_buffer_head *bh;
 	unsigned int alignsize, used0, usedmax;
+	unsigned int used_before, used;
 
 	int ret = get_alignsize(type, &type);
 
 	if (ret < 0)
 		return ERR_PTR(ret);
+
+	DBG_BUGON(type < 0 || type > META);
 	alignsize = ret;
 
 	used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
 	usedmax = 0;
 	bb = NULL;
 
-	list_for_each_entry(cur, &blkh.list, list) {
-		unsigned int used_before, used;
+	if (!used0 || alignsize == EROFS_BLKSIZ)
+		goto alloc;
+
+	/* try to find a most-fit mapped buffer block first */
+	for (used_before = EROFS_BLKSIZ; used_before > 1; ) {
+		struct list_head *bt = mapped_buckets[type] + --used_before;
+
+		if (list_empty(bt))
+			continue;
+		cur = list_first_entry(bt, struct erofs_buffer_block,
+				       mapped_list);
+
+		/* last mapped block can be expended, don't handle it here */
+		if (cur == last_mapped_block)
+			continue;
+
+		ret = __erofs_battach(cur, NULL, size, alignsize,
+				      required_ext + inline_ext, true);
+		if (ret < 0)
+			continue;
+
+		/* should contain all data in the current block */
+		used = ret + required_ext + inline_ext;
+		DBG_BUGON(used > EROFS_BLKSIZ);
+
+		bb = cur;
+		usedmax = used;
+		break;
+	}
 
+	/* try to start from the last mapped one, which can be expended */
+	cur = last_mapped_block;
+	if (cur == &blkh)
+		cur = list_next_entry(cur, list);
+	for (; cur != &blkh; cur = list_next_entry(cur, list)) {
 		used_before = cur->buffers.off % EROFS_BLKSIZ;
 
 		/* skip if buffer block is just full */
@@ -187,6 +232,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
 		goto found;
 	}
 
+alloc:
 	/* allocate a new buffer block */
 	if (used0 > EROFS_BLKSIZ)
 		return ERR_PTR(-ENOSPC);
@@ -200,6 +246,7 @@ struct erofs_buffer_head *erofs_balloc(int type, erofs_off_t size,
 	bb->buffers.off = 0;
 	init_list_head(&bb->buffers.list);
 	list_add_tail(&bb->list, &blkh.list);
+	init_list_head(&bb->mapped_list);
 
 	bh = malloc(sizeof(struct erofs_buffer_head));
 	if (!bh) {
@@ -214,6 +261,18 @@ found:
 	return bh;
 }
 
+static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
+{
+	struct list_head *bkt;
+
+	if (bb->blkaddr == NULL_ADDR)
+		return;
+
+	bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
+	list_del(&bb->mapped_list);
+	list_add_tail(&bb->mapped_list, bkt);
+}
+
 struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
 					int type, unsigned int size)
 {
@@ -239,6 +298,7 @@ struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
 		free(nbh);
 		return ERR_PTR(ret);
 	}
+	erofs_bupdate_mapped(bb);
 	return nbh;
 
 }
@@ -247,8 +307,11 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
 {
 	erofs_blk_t blkaddr;
 
-	if (bb->blkaddr == NULL_ADDR)
+	if (bb->blkaddr == NULL_ADDR) {
 		bb->blkaddr = tail_blkaddr;
+		last_mapped_block = bb;
+		erofs_bupdate_mapped(bb);
+	}
 
 	blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
 	if (blkaddr > tail_blkaddr)
@@ -259,15 +322,16 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block *bb)
 
 erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb)
 {
-	struct erofs_buffer_block *t, *nt;
+	struct erofs_buffer_block *t = last_mapped_block;
 
-	if (!bb || bb->blkaddr == NULL_ADDR) {
-		list_for_each_entry_safe(t, nt, &blkh.list, list) {
-			(void)__erofs_mapbh(t);
-			if (t == bb)
-				break;
-		}
-	}
+	do {
+		t = list_next_entry(t, list);
+		if (t == &blkh)
+			break;
+
+		DBG_BUGON(t->blkaddr != NULL_ADDR);
+		(void)__erofs_mapbh(t);
+	} while (t != bb);
 	return tail_blkaddr;
 }
 
@@ -309,6 +373,7 @@ bool erofs_bflush(struct erofs_buffer_block *bb)
 
 		erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
 
+		list_del(&p->mapped_list);
 		list_del(&p->list);
 		free(p);
 	}
@@ -332,6 +397,10 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke)
 	if (!list_empty(&bb->buffers.list))
 		return;
 
+	if (bb == last_mapped_block)
+		last_mapped_block = list_prev_entry(bb, list);
+
+	list_del(&bb->mapped_list);
 	list_del(&bb->list);
 	free(bb);
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-01-18 14:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-18 12:29 [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic 胡玮文
2021-01-18 13:54 ` Gao Xiang
  -- strict thread matches above, loose matches on Subject: below --
2021-01-16  5:04 [PATCH v3 2/2] erofs-utils: optimize mkfs to O(N), N = #files Gao Xiang via Linux-erofs
2021-01-16  6:31 ` [PATCH v4 2/2] erofs-utils: optimize buffer allocation logic Gao Xiang via Linux-erofs
2021-01-18 13:02   ` 胡玮文
2021-01-18 14:06     ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).