* [PATCH v4] erofs-utils: introduce segment compression @ 2020-06-18 16:26 Li Guifu via Linux-erofs 2020-06-18 23:05 ` Gao Xiang 2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs 0 siblings, 2 replies; 11+ messages in thread From: Li Guifu via Linux-erofs @ 2020-06-18 16:26 UTC (permalink / raw) To: linux-erofs; +Cc: Li Guifu Support segment compression which seperates files in several logic units (segments) and each segment is compressed independently. Advantages: - more friendly for data differencing; - it can also be used for parallel compression in the same file later. Signed-off-by: Li Guifu <bluce.lee@aliyun.com> --- Changes since v3 suggest by Gao Xiang<hsiangkao@gmx.com>: - add 'S#' parameter to custome compression segment size - move limit logic to size decrease include/erofs/config.h | 1 + lib/compress.c | 8 ++++++-- lib/config.c | 1 + mkfs/main.c | 16 +++++++++++++++- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index 2f09749..9125c1e 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -36,6 +36,7 @@ struct erofs_configure { char *c_src_path; char *c_compr_alg_master; int c_compr_level_master; + unsigned int c_compr_seg_size; /* max segment compress size */ int c_force_inodeversion; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ int c_inline_xattr_tolerance; diff --git a/lib/compress.c b/lib/compress.c index 6cc68ed..eb024aa 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { erofs_blk_t blkaddr; /* pointing to the next blkaddr */ u16 clusterofs; + unsigned int comprlimits; }; #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ @@ -163,8 +164,7 @@ static int vle_compress_one(struct erofs_inode *inode, goto nocompression; break; } - - count = len; + count = min(len, ctx->comprlimits); ret = erofs_compress_destsize(h, compressionlevel, ctx->queue + ctx->head, &count, dst, EROFS_BLKSIZ); @@ -202,6 +202,9 @@ nocompression: ++ctx->blkaddr; len -= count; + ctx->comprlimits -= count; + if (!ctx->comprlimits) + ctx->comprlimits = cfg.c_compr_seg_size; if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = @@ -422,6 +425,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) ctx.head = ctx.tail = 0; ctx.clusterofs = 0; remaining = inode->i_size; + ctx.comprlimits = cfg.c_compr_seg_size; while (remaining) { const u64 readcount = min_t(u64, remaining, diff --git a/lib/config.c b/lib/config.c index da0c260..1c39403 100644 --- a/lib/config.c +++ b/lib/config.c @@ -23,6 +23,7 @@ void erofs_init_configure(void) cfg.c_force_inodeversion = 0; cfg.c_inline_xattr_tolerance = 2; cfg.c_unix_timestamp = -1; + cfg.c_compr_seg_size = 1024U * EROFS_BLKSIZ; } void erofs_show_config(void) diff --git a/mkfs/main.c b/mkfs/main.c index 94bf1e6..036d818 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -61,6 +61,7 @@ static void usage(void) " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -EX[,...] X=extended options\n" " -T# set a fixed UNIX timestamp # to all files\n" + " -S# set the max input stream size # to one compress\n" " --exclude-path=X avoid including file X (X = exact literal path)\n" " --exclude-regex=X avoid including files that match X (X = regular expression)\n" #ifdef HAVE_LIBSELINUX @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) char *endptr; int opt, i; - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", long_options, NULL)) != -1) { switch (opt) { case 'z': @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 'S': + cfg.c_compr_seg_size = strtol(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid compress segment size %s", + optarg); + return -EINVAL; + } + if (cfg.c_compr_seg_size % EROFS_BLKSIZ != 0) { + erofs_err("segment size:%u should be align to %u", + cfg.c_compr_seg_size, EROFS_BLKSIZ); + return -EINVAL; + } + break; case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v4] erofs-utils: introduce segment compression 2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs @ 2020-06-18 23:05 ` Gao Xiang 2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs 1 sibling, 0 replies; 11+ messages in thread From: Gao Xiang @ 2020-06-18 23:05 UTC (permalink / raw) To: Li Guifu; +Cc: linux-erofs Hi Guifu, On Fri, Jun 19, 2020 at 12:26:57AM +0800, Li Guifu via Linux-erofs wrote: > Support segment compression which seperates files in several logic > units (segments) and each segment is compressed independently. > > Advantages: > - more friendly for data differencing; > - it can also be used for parallel compression in the same file later. > > Signed-off-by: Li Guifu <bluce.lee@aliyun.com> > --- > Changes since v3 suggest by Gao Xiang<hsiangkao@gmx.com>: > - add 'S#' parameter to custome compression segment size > - move limit logic to size decrease > > include/erofs/config.h | 1 + > lib/compress.c | 8 ++++++-- > lib/config.c | 1 + > mkfs/main.c | 16 +++++++++++++++- Just do a quick response for this, and will test it later. First, You might need to update the manpage as well. > 4 files changed, 23 insertions(+), 3 deletions(-) > > diff --git a/include/erofs/config.h b/include/erofs/config.h > index 2f09749..9125c1e 100644 > --- a/include/erofs/config.h > +++ b/include/erofs/config.h > @@ -36,6 +36,7 @@ struct erofs_configure { > char *c_src_path; > char *c_compr_alg_master; > int c_compr_level_master; u64 c_compr_segsize; > + unsigned int c_compr_seg_size; /* max segment compress size */ > int c_force_inodeversion; > /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ > int c_inline_xattr_tolerance; > diff --git a/lib/compress.c b/lib/compress.c > index 6cc68ed..eb024aa 100644 > --- a/lib/compress.c > +++ b/lib/compress.c > @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { > > erofs_blk_t blkaddr; /* pointing to the next blkaddr */ > u16 clusterofs; > + unsigned int comprlimits; How about the name "segavail"; ? u64 segavail; > }; > > #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ > @@ -163,8 +164,7 @@ static int vle_compress_one(struct erofs_inode *inode, > goto nocompression; > break; > } I think we might add "if (segavail < EROFS_BLKSIZE) goto nocompression;" since it seems better. > - > - count = len; > + count = min(len, ctx->comprlimits); > ret = erofs_compress_destsize(h, compressionlevel, > ctx->queue + ctx->head, > &count, dst, EROFS_BLKSIZ); > @@ -202,6 +202,9 @@ nocompression: > > ++ctx->blkaddr; > len -= count; > + ctx->comprlimits -= count; > + if (!ctx->comprlimits) > + ctx->comprlimits = cfg.c_compr_seg_size; > > if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { > const unsigned int qh_aligned = > @@ -422,6 +425,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) > ctx.head = ctx.tail = 0; > ctx.clusterofs = 0; > remaining = inode->i_size; > + ctx.comprlimits = cfg.c_compr_seg_size; > > while (remaining) { > const u64 readcount = min_t(u64, remaining, > diff --git a/lib/config.c b/lib/config.c > index da0c260..1c39403 100644 > --- a/lib/config.c > +++ b/lib/config.c > @@ -23,6 +23,7 @@ void erofs_init_configure(void) > cfg.c_force_inodeversion = 0; > cfg.c_inline_xattr_tolerance = 2; > cfg.c_unix_timestamp = -1; > + cfg.c_compr_seg_size = 1024U * EROFS_BLKSIZ; We don't need that limit by default, so cfg.c_compr_segsize = -1ULL; > } > > void erofs_show_config(void) > diff --git a/mkfs/main.c b/mkfs/main.c > index 94bf1e6..036d818 100644 > --- a/mkfs/main.c > +++ b/mkfs/main.c > @@ -61,6 +61,7 @@ static void usage(void) > " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" > " -EX[,...] X=extended options\n" > " -T# set a fixed UNIX timestamp # to all files\n" > + " -S# set the max input stream size # to one compress\n" > " --exclude-path=X avoid including file X (X = exact literal path)\n" > " --exclude-regex=X avoid including files that match X (X = regular expression)\n" > #ifdef HAVE_LIBSELINUX > @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > char *endptr; > int opt, i; > > - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", > + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", > long_options, NULL)) != -1) { > switch (opt) { > case 'z': > @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > return -EINVAL; > } > break; > + case 'S': > + cfg.c_compr_seg_size = strtol(optarg, &endptr, 0); > + if (*endptr != '\0') { > + erofs_err("invalid compress segment size %s", > + optarg); > + return -EINVAL; > + } > + if (cfg.c_compr_seg_size % EROFS_BLKSIZ != 0) { > + erofs_err("segment size:%u should be align to %u", > + cfg.c_compr_seg_size, EROFS_BLKSIZ); > + return -EINVAL; > + } if (!cfg.c_compr_segsize) cfg.c_compr_segsize = -1ULL; else if (cfg.c_compr_segsize % EROFS_BLKSIZ) { erofs_err("segmentsize %u should be aligned with blocksize %u", cfg.c_compr_seg_size, EROFS_BLKSIZ); return -EINVAL; } Thanks, Gao Xiang ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v5] erofs-utils: introduce segment compression 2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs 2020-06-18 23:05 ` Gao Xiang @ 2020-06-19 17:51 ` Li Guifu via Linux-erofs 2020-06-19 23:03 ` Gao Xiang 2020-06-21 10:51 ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs 1 sibling, 2 replies; 11+ messages in thread From: Li Guifu via Linux-erofs @ 2020-06-19 17:51 UTC (permalink / raw) To: linux-erofs; +Cc: Li Guifu Support segment compression which seperates files in several logic units (segments) and each segment is compressed independently. Advantages: - more friendly for data differencing; - it can also be used for parallel compression in the same file later. Signed-off-by: Li Guifu <bluce.lee@aliyun.com> --- include/erofs/config.h | 1 + lib/compress.c | 16 ++++++++++++++-- lib/config.c | 1 + man/mkfs.erofs.1 | 4 ++++ mkfs/main.c | 16 +++++++++++++++- 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index 2f09749..995664d 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -36,6 +36,7 @@ struct erofs_configure { char *c_src_path; char *c_compr_alg_master; int c_compr_level_master; + u64 c_compr_seg_size; int c_force_inodeversion; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ int c_inline_xattr_tolerance; diff --git a/lib/compress.c b/lib/compress.c index 6cc68ed..383ee00 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { erofs_blk_t blkaddr; /* pointing to the next blkaddr */ u16 clusterofs; + u64 segavail; }; #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ @@ -158,13 +159,19 @@ static int vle_compress_one(struct erofs_inode *inode, while (len) { bool raw; + count = min_t(u64, len, ctx->segavail); + if (ctx->segavail <= EROFS_BLKSIZ) { + if (len < ctx->segavail && !final) + break; + goto nocompression; + } + if (len <= EROFS_BLKSIZ) { if (final) goto nocompression; break; } - count = len; ret = erofs_compress_destsize(h, compressionlevel, ctx->queue + ctx->head, &count, dst, EROFS_BLKSIZ); @@ -174,8 +181,9 @@ static int vle_compress_one(struct erofs_inode *inode, inode->i_srcpath, erofs_strerror(ret)); } + count = len; nocompression: - ret = write_uncompressed_block(ctx, &len, dst); + ret = write_uncompressed_block(ctx, &count, dst); if (ret < 0) return ret; count = ret; @@ -202,6 +210,9 @@ nocompression: ++ctx->blkaddr; len -= count; + ctx->segavail -= count; + if (!ctx->segavail) + ctx->segavail = cfg.c_compr_seg_size; if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = @@ -422,6 +433,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) ctx.head = ctx.tail = 0; ctx.clusterofs = 0; remaining = inode->i_size; + ctx.segavail = cfg.c_compr_seg_size; while (remaining) { const u64 readcount = min_t(u64, remaining, diff --git a/lib/config.c b/lib/config.c index da0c260..de982e1 100644 --- a/lib/config.c +++ b/lib/config.c @@ -23,6 +23,7 @@ void erofs_init_configure(void) cfg.c_force_inodeversion = 0; cfg.c_inline_xattr_tolerance = 2; cfg.c_unix_timestamp = -1; + cfg.c_compr_seg_size = UINT64_MAX; } void erofs_show_config(void) diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 891c5a8..b12cb22 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output. Set all files to the given UNIX timestamp. Reproducible builds requires setting all to a specific one. .TP +.BI "\-S " # +Set the max input stream size at one compression. The default is unsigned 64bit MAX. +It must be algin to EROFS block size(4096). +.TP .BI "\-\-exclude-path=" path Ignore file that matches the exact literal path. You may give multiple `--exclude-path' options. diff --git a/mkfs/main.c b/mkfs/main.c index 94bf1e6..96cc053 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -61,6 +61,7 @@ static void usage(void) " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -EX[,...] X=extended options\n" " -T# set a fixed UNIX timestamp # to all files\n" + " -S# set the max input stream size # at one compression\n" " --exclude-path=X avoid including file X (X = exact literal path)\n" " --exclude-regex=X avoid including files that match X (X = regular expression)\n" #ifdef HAVE_LIBSELINUX @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) char *endptr; int opt, i; - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", long_options, NULL)) != -1) { switch (opt) { case 'z': @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 'S': + cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0); + if (*endptr != '\0' || !cfg.c_compr_seg_size) { + erofs_err("invalid compress segment size %s", + optarg); + return -EINVAL; + } + if (cfg.c_compr_seg_size % EROFS_BLKSIZ) { + erofs_err("segment size:%"PRIu64" should be align to %u", + cfg.c_compr_seg_size, EROFS_BLKSIZ); + return -EINVAL; + } + break; case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v5] erofs-utils: introduce segment compression 2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs @ 2020-06-19 23:03 ` Gao Xiang 2020-06-21 10:51 ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs 1 sibling, 0 replies; 11+ messages in thread From: Gao Xiang @ 2020-06-19 23:03 UTC (permalink / raw) To: Li Guifu; +Cc: linux-erofs On Sat, Jun 20, 2020 at 01:51:33AM +0800, Li Guifu via Linux-erofs wrote: > Support segment compression which seperates files in several logic > units (segments) and each segment is compressed independently. > > Advantages: > - more friendly for data differencing; > - it can also be used for parallel compression in the same file later. > > Signed-off-by: Li Guifu <bluce.lee@aliyun.com> > --- > include/erofs/config.h | 1 + > lib/compress.c | 16 ++++++++++++++-- > lib/config.c | 1 + > man/mkfs.erofs.1 | 4 ++++ > mkfs/main.c | 16 +++++++++++++++- > 5 files changed, 35 insertions(+), 3 deletions(-) > > diff --git a/include/erofs/config.h b/include/erofs/config.h > index 2f09749..995664d 100644 > --- a/include/erofs/config.h > +++ b/include/erofs/config.h > @@ -36,6 +36,7 @@ struct erofs_configure { > char *c_src_path; > char *c_compr_alg_master; > int c_compr_level_master; > + u64 c_compr_seg_size; Could you please move this variable up a bit? Thanks. char *c_compr_alg_master; u64 c_compr_seg_size; int c_compr_level_master; > int c_force_inodeversion; > /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ > int c_inline_xattr_tolerance; > diff --git a/lib/compress.c b/lib/compress.c > index 6cc68ed..383ee00 100644 > --- a/lib/compress.c > +++ b/lib/compress.c > @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { > > erofs_blk_t blkaddr; /* pointing to the next blkaddr */ > u16 clusterofs; > + u64 segavail; > }; > > #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ > @@ -158,13 +159,19 @@ static int vle_compress_one(struct erofs_inode *inode, > while (len) { > bool raw; unsigned int limit; > > + count = min_t(u64, len, ctx->segavail); kill this line. > + if (ctx->segavail <= EROFS_BLKSIZ) { > + if (len < ctx->segavail && !final) > + break; limit = ctx->segavail; > + goto nocompression; > + } > + > if (len <= EROFS_BLKSIZ) { > if (final) > goto nocompression; > break; > } > > - count = len; count = min_t(u64, len, ctx->segavail); > ret = erofs_compress_destsize(h, compressionlevel, > ctx->queue + ctx->head, > &count, dst, EROFS_BLKSIZ); > @@ -174,8 +181,9 @@ static int vle_compress_one(struct erofs_inode *inode, > inode->i_srcpath, > erofs_strerror(ret)); > } > + count = len; kill this line and add limit = EROFS_BLKSIZ; > nocompression: > - ret = write_uncompressed_block(ctx, &len, dst); > + ret = write_uncompressed_block(ctx, &count, dst); ret = write_uncompressed_block(ctx, &count, limit, dst); and update write_uncompressed_block as well. > if (ret < 0) > return ret; > count = ret; > @@ -202,6 +210,9 @@ nocompression: > > ++ctx->blkaddr; > len -= count; > + ctx->segavail -= count; > + if (!ctx->segavail) > + ctx->segavail = cfg.c_compr_seg_size; > > if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { > const unsigned int qh_aligned = > @@ -422,6 +433,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) > ctx.head = ctx.tail = 0; > ctx.clusterofs = 0; > remaining = inode->i_size; > + ctx.segavail = cfg.c_compr_seg_size; > > while (remaining) { > const u64 readcount = min_t(u64, remaining, > diff --git a/lib/config.c b/lib/config.c > index da0c260..de982e1 100644 > --- a/lib/config.c > +++ b/lib/config.c > @@ -23,6 +23,7 @@ void erofs_init_configure(void) > cfg.c_force_inodeversion = 0; > cfg.c_inline_xattr_tolerance = 2; > cfg.c_unix_timestamp = -1; > + cfg.c_compr_seg_size = UINT64_MAX; cfg.c_compr_seg_size = -1; since it is a very simple way to assign UINT_MAX by implicit sign extension without taking care for the specific data type. > } > > void erofs_show_config(void) > diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 > index 891c5a8..b12cb22 100644 > --- a/man/mkfs.erofs.1 > +++ b/man/mkfs.erofs.1 > @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output. > Set all files to the given UNIX timestamp. Reproducible builds requires setting > all to a specific one. > .TP > +.BI "\-S " # > +Set the max input stream size at one compression. The default is unsigned 64bit MAX. > +It must be algin to EROFS block size(4096). it's hard for end users to type "max 64-bit unsigned value"... I'd suggest "Set max input stream size for each individual segment (disabled if 0). The default value is 0. It should be aligned with blocksize." > +.TP > .BI "\-\-exclude-path=" path > Ignore file that matches the exact literal path. > You may give multiple `--exclude-path' options. > diff --git a/mkfs/main.c b/mkfs/main.c > index 94bf1e6..96cc053 100644 > --- a/mkfs/main.c > +++ b/mkfs/main.c > @@ -61,6 +61,7 @@ static void usage(void) > " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" > " -EX[,...] X=extended options\n" > " -T# set a fixed UNIX timestamp # to all files\n" > + " -S# set the max input stream size # at one compression\n" -S# Set max input stream size # for each individual segment\n > " --exclude-path=X avoid including file X (X = exact literal path)\n" > " --exclude-regex=X avoid including files that match X (X = regular expression)\n" > #ifdef HAVE_LIBSELINUX > @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > char *endptr; > int opt, i; > > - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", > + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", > long_options, NULL)) != -1) { > switch (opt) { > case 'z': > @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > return -EINVAL; > } > break; > + case 'S': > + cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0); > + if (*endptr != '\0' || !cfg.c_compr_seg_size) { Disable this if cfg.c_compr_seg_size == 0 > + erofs_err("invalid compress segment size %s", > + optarg); > + return -EINVAL; > + } > + if (cfg.c_compr_seg_size % EROFS_BLKSIZ) { > + erofs_err("segment size:%"PRIu64" should be align to %u", Could you follow my advice in the previous reply? Although I'm not good at English, but I don't think the above message is _reasonable_. Thanks, Gao Xiang > + cfg.c_compr_seg_size, EROFS_BLKSIZ); > + return -EINVAL; > + } > + break; > case 2: > opt = erofs_parse_exclude_path(optarg, false); > if (opt) { > -- > 2.17.1 > ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v6] erofs-utils: introduce segment limits compression 2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs 2020-06-19 23:03 ` Gao Xiang @ 2020-06-21 10:51 ` Li Guifu via Linux-erofs 2020-06-21 12:27 ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs 1 sibling, 1 reply; 11+ messages in thread From: Li Guifu via Linux-erofs @ 2020-06-21 10:51 UTC (permalink / raw) To: linux-erofs; +Cc: Li Guifu Support segment compression which seperates files in several logic units (segments) and each segment is compressed independently. Advantages: - more friendly for data differencing; - it can also be used for parallel compression in the same file later. Signed-off-by: Li Guifu <bluce.lee@aliyun.com> --- Changes since v3 suggest by Gao Xiang<hsiangkao@gmx.com>: - add a limits varialbe to give the limits size in the write_uncompress_block - Set comments more readable include/erofs/config.h | 1 + lib/compress.c | 29 +++++++++++++++++++++-------- lib/config.c | 1 + man/mkfs.erofs.1 | 4 ++++ mkfs/main.c | 18 +++++++++++++++++- 5 files changed, 44 insertions(+), 9 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index 2f09749..e5f1bfb 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -35,6 +35,7 @@ struct erofs_configure { char *c_img_path; char *c_src_path; char *c_compr_alg_master; + u64 c_compr_seg_size; int c_compr_level_master; int c_force_inodeversion; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ diff --git a/lib/compress.c b/lib/compress.c index 6cc68ed..8a79895 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { erofs_blk_t blkaddr; /* pointing to the next blkaddr */ u16 clusterofs; + u64 segavail; }; #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ @@ -116,23 +117,21 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx, } static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, - unsigned int *len, + unsigned int *len, unsigned int limits, char *dst) { int ret; - unsigned int count; + unsigned int count = min(limits, *len); /* write uncompressed data */ /* reset clusterofs to 0 if permitted */ if (!erofs_sb_has_lz4_0padding() && ctx->head >= ctx->clusterofs) { ctx->head -= ctx->clusterofs; *len += ctx->clusterofs; + count += ctx->clusterofs; ctx->clusterofs = 0; } - /* write uncompressed data */ - count = min(EROFS_BLKSIZ, *len); - memcpy(dst, ctx->queue + ctx->head, count); memset(dst + count, 0, EROFS_BLKSIZ - count); @@ -157,14 +156,22 @@ static int vle_compress_one(struct erofs_inode *inode, while (len) { bool raw; + unsigned int limits = EROFS_BLKSIZ; + + if (ctx->segavail <= EROFS_BLKSIZ) { + if (len < ctx->segavail && !final) + break; + + limits = ctx->segavail; + goto nocompression; + } if (len <= EROFS_BLKSIZ) { if (final) goto nocompression; break; } - - count = len; + count = min_t(u64, len, ctx->segavail); ret = erofs_compress_destsize(h, compressionlevel, ctx->queue + ctx->head, &count, dst, EROFS_BLKSIZ); @@ -175,7 +182,7 @@ static int vle_compress_one(struct erofs_inode *inode, erofs_strerror(ret)); } nocompression: - ret = write_uncompressed_block(ctx, &len, dst); + ret = write_uncompressed_block(ctx, &len, limits, dst); if (ret < 0) return ret; count = ret; @@ -203,6 +210,11 @@ nocompression: ++ctx->blkaddr; len -= count; + if (count >= ctx->segavail) + ctx->segavail = cfg.c_compr_seg_size; + else + ctx->segavail -= count; + if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = round_down(ctx->head, EROFS_BLKSIZ); @@ -422,6 +434,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) ctx.head = ctx.tail = 0; ctx.clusterofs = 0; remaining = inode->i_size; + ctx.segavail = cfg.c_compr_seg_size; while (remaining) { const u64 readcount = min_t(u64, remaining, diff --git a/lib/config.c b/lib/config.c index da0c260..fbb2914 100644 --- a/lib/config.c +++ b/lib/config.c @@ -23,6 +23,7 @@ void erofs_init_configure(void) cfg.c_force_inodeversion = 0; cfg.c_inline_xattr_tolerance = 2; cfg.c_unix_timestamp = -1; + cfg.c_compr_seg_size = -1UL; } void erofs_show_config(void) diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 891c5a8..0b613e4 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output. Set all files to the given UNIX timestamp. Reproducible builds requires setting all to a specific one. .TP +.BI "\-S " # +Set max input stream size for each individual segment (disabled if 0). +The default value is 0. It should be aligned with blocksize. +.TP .BI "\-\-exclude-path=" path Ignore file that matches the exact literal path. You may give multiple `--exclude-path' options. diff --git a/mkfs/main.c b/mkfs/main.c index 94bf1e6..0e26f4f 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -61,6 +61,7 @@ static void usage(void) " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -EX[,...] X=extended options\n" " -T# set a fixed UNIX timestamp # to all files\n" + " -S# Set max input stream size # for each individual segment\n" " --exclude-path=X avoid including file X (X = exact literal path)\n" " --exclude-regex=X avoid including files that match X (X = regular expression)\n" #ifdef HAVE_LIBSELINUX @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) char *endptr; int opt, i; - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", long_options, NULL)) != -1) { switch (opt) { case 'z': @@ -188,6 +189,21 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 'S': + cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid compress segment size %s", + optarg); + return -EINVAL; + } + if (!cfg.c_compr_seg_size) { + cfg.c_compr_seg_size = -1UL; + } else if (cfg.c_compr_seg_size % EROFS_BLKSIZ) { + erofs_err("segment size:%"PRIu64" should be align to %u", + cfg.c_compr_seg_size, EROFS_BLKSIZ); + return -EINVAL; + } + break; case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v8] erofs-utils: introduce segment compression 2020-06-21 10:51 ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs @ 2020-06-21 12:27 ` Li Guifu via Linux-erofs 2020-06-21 14:37 ` Gao Xiang via Linux-erofs 0 siblings, 1 reply; 11+ messages in thread From: Li Guifu via Linux-erofs @ 2020-06-21 12:27 UTC (permalink / raw) To: linux-erofs; +Cc: Li Guifu Support segment compression which seperates files in several logic units (segments) and each segment is compressed independently. Advantages: - more friendly for data differencing; - it can also be used for parallel compression in the same file later. Signed-off-by: Li Guifu <bluce.lee@aliyun.com> --- include/erofs/config.h | 1 + lib/compress.c | 47 ++++++++++++++++++++++++++---------------- lib/config.c | 1 + man/mkfs.erofs.1 | 4 ++++ mkfs/main.c | 18 +++++++++++++++- 5 files changed, 52 insertions(+), 19 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index 2f09749..e5f1bfb 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -35,6 +35,7 @@ struct erofs_configure { char *c_img_path; char *c_src_path; char *c_compr_alg_master; + u64 c_compr_seg_size; int c_compr_level_master; int c_force_inodeversion; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ diff --git a/lib/compress.c b/lib/compress.c index 6cc68ed..6c0708d 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { erofs_blk_t blkaddr; /* pointing to the next blkaddr */ u16 clusterofs; + u64 segavail; }; #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ @@ -116,23 +117,11 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx, } static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, - unsigned int *len, - char *dst) + unsigned int count, char *dst) { int ret; - unsigned int count; - - /* reset clusterofs to 0 if permitted */ - if (!erofs_sb_has_lz4_0padding() && - ctx->head >= ctx->clusterofs) { - ctx->head -= ctx->clusterofs; - *len += ctx->clusterofs; - ctx->clusterofs = 0; - } - - /* write uncompressed data */ - count = min(EROFS_BLKSIZ, *len); + DBG_BUGON(count > EROFS_BLKSIZ); memcpy(dst, ctx->queue + ctx->head, count); memset(dst + count, 0, EROFS_BLKSIZ - count); @@ -157,14 +146,22 @@ static int vle_compress_one(struct erofs_inode *inode, while (len) { bool raw; + unsigned int limit = EROFS_BLKSIZ; + + if (ctx->segavail <= EROFS_BLKSIZ) { + if (len < ctx->segavail && !final) + break; + + limit = ctx->segavail; + goto nocompression; + } if (len <= EROFS_BLKSIZ) { if (final) goto nocompression; break; } - - count = len; + count = min_t(u64, len, ctx->segavail); ret = erofs_compress_destsize(h, compressionlevel, ctx->queue + ctx->head, &count, dst, EROFS_BLKSIZ); @@ -175,10 +172,18 @@ static int vle_compress_one(struct erofs_inode *inode, erofs_strerror(ret)); } nocompression: - ret = write_uncompressed_block(ctx, &len, dst); + /* reset clusterofs to 0 if permitted */ + if (!erofs_sb_has_lz4_0padding() && + ctx->head >= ctx->clusterofs) { + ctx->head -= ctx->clusterofs; + len += ctx->clusterofs; + limit += ctx->clusterofs; + ctx->clusterofs = 0; + } + count = min(limit, len); + ret = write_uncompressed_block(ctx, count, dst); if (ret < 0) return ret; - count = ret; raw = true; } else { /* write compressed data */ @@ -203,6 +208,11 @@ nocompression: ++ctx->blkaddr; len -= count; + if (count >= ctx->segavail) + ctx->segavail = cfg.c_compr_seg_size; + else + ctx->segavail -= count; + if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = round_down(ctx->head, EROFS_BLKSIZ); @@ -422,6 +432,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) ctx.head = ctx.tail = 0; ctx.clusterofs = 0; remaining = inode->i_size; + ctx.segavail = cfg.c_compr_seg_size; while (remaining) { const u64 readcount = min_t(u64, remaining, diff --git a/lib/config.c b/lib/config.c index da0c260..721ff61 100644 --- a/lib/config.c +++ b/lib/config.c @@ -23,6 +23,7 @@ void erofs_init_configure(void) cfg.c_force_inodeversion = 0; cfg.c_inline_xattr_tolerance = 2; cfg.c_unix_timestamp = -1; + cfg.c_compr_seg_size = -1; } void erofs_show_config(void) diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 891c5a8..0b613e4 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output. Set all files to the given UNIX timestamp. Reproducible builds requires setting all to a specific one. .TP +.BI "\-S " # +Set max input stream size for each individual segment (disabled if 0). +The default value is 0. It should be aligned with blocksize. +.TP .BI "\-\-exclude-path=" path Ignore file that matches the exact literal path. You may give multiple `--exclude-path' options. diff --git a/mkfs/main.c b/mkfs/main.c index 94bf1e6..dcb01cc 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -61,6 +61,7 @@ static void usage(void) " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -EX[,...] X=extended options\n" " -T# set a fixed UNIX timestamp # to all files\n" + " -S# Set max input stream size # for each individual segment\n" " --exclude-path=X avoid including file X (X = exact literal path)\n" " --exclude-regex=X avoid including files that match X (X = regular expression)\n" #ifdef HAVE_LIBSELINUX @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) char *endptr; int opt, i; - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", long_options, NULL)) != -1) { switch (opt) { case 'z': @@ -188,6 +189,21 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 'S': + cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid compress segment size %s", + optarg); + return -EINVAL; + } + if (!cfg.c_compr_seg_size) { + cfg.c_compr_seg_size = -1; + } else if (cfg.c_compr_seg_size % EROFS_BLKSIZ) { + erofs_err("segment size:%"PRIu64" should be align with %u", + cfg.c_compr_seg_size, EROFS_BLKSIZ); + return -EINVAL; + } + break; case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v8] erofs-utils: introduce segment compression 2020-06-21 12:27 ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs @ 2020-06-21 14:37 ` Gao Xiang via Linux-erofs 2020-06-30 17:27 ` [PATCH v10] " Li Guifu via Linux-erofs 0 siblings, 1 reply; 11+ messages in thread From: Gao Xiang via Linux-erofs @ 2020-06-21 14:37 UTC (permalink / raw) To: Li Guifu; +Cc: linux-erofs On Sun, Jun 21, 2020 at 08:27:45PM +0800, Li Guifu via Linux-erofs wrote: ... > nocompression: > - ret = write_uncompressed_block(ctx, &len, dst); > + /* reset clusterofs to 0 if permitted */ > + if (!erofs_sb_has_lz4_0padding() && > + ctx->head >= ctx->clusterofs) { > + ctx->head -= ctx->clusterofs; > + len += ctx->clusterofs; > + limit += ctx->clusterofs; > + ctx->clusterofs = 0; TL;DR: it seems still buggy here. Pls rethink carefully and send a usable patch then... Thanks, Gao Xiang ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v10] erofs-utils: introduce segment compression 2020-06-21 14:37 ` Gao Xiang via Linux-erofs @ 2020-06-30 17:27 ` Li Guifu via Linux-erofs 2020-07-05 8:32 ` [PATCH v11] " Li Guifu via Linux-erofs 0 siblings, 1 reply; 11+ messages in thread From: Li Guifu via Linux-erofs @ 2020-06-30 17:27 UTC (permalink / raw) To: linux-erofs; +Cc: Li Guifu Support segment compression which seperates files in several logic units (segments) and each segment is compressed independently. Advantages: - more friendly for data differencing; - it can also be used for parallel compression in the same file later. Signed-off-by: Li Guifu <bluce.lee@aliyun.com> --- include/erofs/config.h | 1 + lib/compress.c | 29 +++++++++++++++++++++++------ lib/config.c | 1 + man/mkfs.erofs.1 | 4 ++++ mkfs/main.c | 16 +++++++++++++++- 5 files changed, 44 insertions(+), 7 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index 2f09749..e5f1bfb 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -35,6 +35,7 @@ struct erofs_configure { char *c_img_path; char *c_src_path; char *c_compr_alg_master; + u64 c_compr_seg_size; int c_compr_level_master; int c_force_inodeversion; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ diff --git a/lib/compress.c b/lib/compress.c index 6cc68ed..2ea5809 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { erofs_blk_t blkaddr; /* pointing to the next blkaddr */ u16 clusterofs; + u64 segavail; }; #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ @@ -116,7 +117,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx, } static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, - unsigned int *len, + unsigned int *len, unsigned int *ucomproft, char *dst) { int ret; @@ -125,14 +126,19 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, /* reset clusterofs to 0 if permitted */ if (!erofs_sb_has_lz4_0padding() && ctx->head >= ctx->clusterofs) { + *ucomproft = ctx->clusterofs; ctx->head -= ctx->clusterofs; *len += ctx->clusterofs; ctx->clusterofs = 0; + count = min(EROFS_BLKSIZ, *len); + } else { + *ucomproft = 0; + count = min_t(u64, ctx->segavail, *len); + if (count > EROFS_BLKSIZ) + count = EROFS_BLKSIZ; } /* write uncompressed data */ - count = min(EROFS_BLKSIZ, *len); - memcpy(dst, ctx->queue + ctx->head, count); memset(dst + count, 0, EROFS_BLKSIZ - count); @@ -157,14 +163,21 @@ static int vle_compress_one(struct erofs_inode *inode, while (len) { bool raw; + unsigned int ucomproft = 0; + + if (ctx->segavail <= EROFS_BLKSIZ) { + if (len < ctx->segavail && !final) + break; + + goto nocompression; + } if (len <= EROFS_BLKSIZ) { if (final) goto nocompression; break; } - - count = len; + count = min_t(u64, len, ctx->segavail); ret = erofs_compress_destsize(h, compressionlevel, ctx->queue + ctx->head, &count, dst, EROFS_BLKSIZ); @@ -175,7 +188,7 @@ static int vle_compress_one(struct erofs_inode *inode, erofs_strerror(ret)); } nocompression: - ret = write_uncompressed_block(ctx, &len, dst); + ret = write_uncompressed_block(ctx, &len, &ucomproft, dst); if (ret < 0) return ret; count = ret; @@ -202,6 +215,9 @@ nocompression: ++ctx->blkaddr; len -= count; + ctx->segavail -= count - ucomproft; + if (!ctx->segavail) + ctx->segavail = cfg.c_compr_seg_size; if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = @@ -422,6 +438,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) ctx.head = ctx.tail = 0; ctx.clusterofs = 0; remaining = inode->i_size; + ctx.segavail = cfg.c_compr_seg_size; while (remaining) { const u64 readcount = min_t(u64, remaining, diff --git a/lib/config.c b/lib/config.c index da0c260..721ff61 100644 --- a/lib/config.c +++ b/lib/config.c @@ -23,6 +23,7 @@ void erofs_init_configure(void) cfg.c_force_inodeversion = 0; cfg.c_inline_xattr_tolerance = 2; cfg.c_unix_timestamp = -1; + cfg.c_compr_seg_size = -1; } void erofs_show_config(void) diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 891c5a8..2a4ef71 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output. Set all files to the given UNIX timestamp. Reproducible builds requires setting all to a specific one. .TP +.BI "\-S " # +Set max input stream size for each individual segment (disabled if 0). +The default value is 0. It has be aligned with blocksize. +.TP .BI "\-\-exclude-path=" path Ignore file that matches the exact literal path. You may give multiple `--exclude-path' options. diff --git a/mkfs/main.c b/mkfs/main.c index 94bf1e6..cded973 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -61,6 +61,7 @@ static void usage(void) " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -EX[,...] X=extended options\n" " -T# set a fixed UNIX timestamp # to all files\n" + " -S# Set max input stream size # for each individual segment\n" " --exclude-path=X avoid including file X (X = exact literal path)\n" " --exclude-regex=X avoid including files that match X (X = regular expression)\n" #ifdef HAVE_LIBSELINUX @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) char *endptr; int opt, i; - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", long_options, NULL)) != -1) { switch (opt) { case 'z': @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 'S': + i = strtoll(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid compress segment size %s", + optarg); + return -EINVAL; + } + if (!i) { + cfg.c_compr_seg_size = -1; + } else { + cfg.c_compr_seg_size = i * EROFS_BLKSIZ; + } + break; case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v11] erofs-utils: introduce segment compression 2020-06-30 17:27 ` [PATCH v10] " Li Guifu via Linux-erofs @ 2020-07-05 8:32 ` Li Guifu via Linux-erofs 2020-07-05 18:20 ` Gao Xiang via Linux-erofs 0 siblings, 1 reply; 11+ messages in thread From: Li Guifu via Linux-erofs @ 2020-07-05 8:32 UTC (permalink / raw) To: linux-erofs; +Cc: Li Guifu Support segment compression which seperates files in several logic units (segments) and each segment is compressed independently. Advantages: - more friendly for data differencing; - it can also be used for parallel compression in the same file later. Signed-off-by: Li Guifu <bluce.lee@aliyun.com> --- Changes from v10 - chang variable uncomprofs to clusterofs which only used when write uncompress block include/erofs/config.h | 1 + lib/compress.c | 30 ++++++++++++++++++++++++------ lib/config.c | 1 + man/mkfs.erofs.1 | 4 ++++ mkfs/main.c | 16 +++++++++++++++- 5 files changed, 45 insertions(+), 7 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index 2f09749..e5f1bfb 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -35,6 +35,7 @@ struct erofs_configure { char *c_img_path; char *c_src_path; char *c_compr_alg_master; + u64 c_compr_seg_size; int c_compr_level_master; int c_force_inodeversion; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ diff --git a/lib/compress.c b/lib/compress.c index 6cc68ed..a2a278c 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { erofs_blk_t blkaddr; /* pointing to the next blkaddr */ u16 clusterofs; + u64 segavail; }; #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ @@ -116,7 +117,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx, } static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, - unsigned int *len, + unsigned int *len, unsigned int *clusterofs, char *dst) { int ret; @@ -125,14 +126,19 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, /* reset clusterofs to 0 if permitted */ if (!erofs_sb_has_lz4_0padding() && ctx->head >= ctx->clusterofs) { + *clusterofs = ctx->clusterofs; ctx->head -= ctx->clusterofs; *len += ctx->clusterofs; ctx->clusterofs = 0; + count = min(EROFS_BLKSIZ, *len); + } else { + *clusterofs = 0; + count = min_t(u64, ctx->segavail, *len); + if (count > EROFS_BLKSIZ) + count = EROFS_BLKSIZ; } /* write uncompressed data */ - count = min(EROFS_BLKSIZ, *len); - memcpy(dst, ctx->queue + ctx->head, count); memset(dst + count, 0, EROFS_BLKSIZ - count); @@ -157,14 +163,21 @@ static int vle_compress_one(struct erofs_inode *inode, while (len) { bool raw; + unsigned int clusterofs; + + if (ctx->segavail <= EROFS_BLKSIZ) { + if (len < ctx->segavail && !final) + break; + + goto nocompression; + } if (len <= EROFS_BLKSIZ) { if (final) goto nocompression; break; } - - count = len; + count = min_t(u64, len, ctx->segavail); ret = erofs_compress_destsize(h, compressionlevel, ctx->queue + ctx->head, &count, dst, EROFS_BLKSIZ); @@ -175,11 +188,12 @@ static int vle_compress_one(struct erofs_inode *inode, erofs_strerror(ret)); } nocompression: - ret = write_uncompressed_block(ctx, &len, dst); + ret = write_uncompressed_block(ctx, &len, &clusterofs, dst); if (ret < 0) return ret; count = ret; raw = true; + ctx->segavail -= count - clusterofs; } else { /* write compressed data */ erofs_dbg("Writing %u compressed data to block %u", @@ -194,6 +208,7 @@ nocompression: if (ret) return ret; raw = false; + ctx->segavail -= count; } ctx->head += count; @@ -202,6 +217,8 @@ nocompression: ++ctx->blkaddr; len -= count; + if (!ctx->segavail) + ctx->segavail = cfg.c_compr_seg_size; if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { const unsigned int qh_aligned = @@ -422,6 +439,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) ctx.head = ctx.tail = 0; ctx.clusterofs = 0; remaining = inode->i_size; + ctx.segavail = cfg.c_compr_seg_size; while (remaining) { const u64 readcount = min_t(u64, remaining, diff --git a/lib/config.c b/lib/config.c index da0c260..721ff61 100644 --- a/lib/config.c +++ b/lib/config.c @@ -23,6 +23,7 @@ void erofs_init_configure(void) cfg.c_force_inodeversion = 0; cfg.c_inline_xattr_tolerance = 2; cfg.c_unix_timestamp = -1; + cfg.c_compr_seg_size = -1; } void erofs_show_config(void) diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 891c5a8..2a4ef71 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output. Set all files to the given UNIX timestamp. Reproducible builds requires setting all to a specific one. .TP +.BI "\-S " # +Set max input stream size for each individual segment (disabled if 0). +The default value is 0. It has be aligned with blocksize. +.TP .BI "\-\-exclude-path=" path Ignore file that matches the exact literal path. You may give multiple `--exclude-path' options. diff --git a/mkfs/main.c b/mkfs/main.c index 94bf1e6..cded973 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -61,6 +61,7 @@ static void usage(void) " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -EX[,...] X=extended options\n" " -T# set a fixed UNIX timestamp # to all files\n" + " -S# Set max input stream size # for each individual segment\n" " --exclude-path=X avoid including file X (X = exact literal path)\n" " --exclude-regex=X avoid including files that match X (X = regular expression)\n" #ifdef HAVE_LIBSELINUX @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) char *endptr; int opt, i; - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", long_options, NULL)) != -1) { switch (opt) { case 'z': @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 'S': + i = strtoll(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid compress segment size %s", + optarg); + return -EINVAL; + } + if (!i) { + cfg.c_compr_seg_size = -1; + } else { + cfg.c_compr_seg_size = i * EROFS_BLKSIZ; + } + break; case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v11] erofs-utils: introduce segment compression 2020-07-05 8:32 ` [PATCH v11] " Li Guifu via Linux-erofs @ 2020-07-05 18:20 ` Gao Xiang via Linux-erofs 2020-10-07 15:04 ` Gao Xiang via Linux-erofs 0 siblings, 1 reply; 11+ messages in thread From: Gao Xiang via Linux-erofs @ 2020-07-05 18:20 UTC (permalink / raw) To: Li Guifu, Li Guifu; +Cc: linux-erofs Hi Guifu, On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote: > Support segment compression which seperates files in several logic > units (segments) and each segment is compressed independently. > > Advantages: > - more friendly for data differencing; > - it can also be used for parallel compression in the same file later. > > Signed-off-by: Li Guifu <bluce.lee@aliyun.com> > --- > Changes from v10 > - chang variable uncomprofs to clusterofs which only used > when write uncompress block Could you please test the following patch if you're available? Does it work? From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001 From: Li Guifu <bluce.lee@aliyun.com> Date: Sun, 5 Jul 2020 16:32:30 +0800 Subject: [PATCH v12] erofs-utils: introduce segment compression Support segment compression which seperates files in several logic units (segments) and each segment is compressed independently. Advantages: - more friendly for data differencing; - it can also be used for parallel compression in the same file later. Signed-off-by: Li Guifu <bluce.lee@aliyun.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> --- include/erofs/config.h | 2 ++ lib/compress.c | 38 ++++++++++++++++++++++++++++++-------- lib/config.c | 1 + man/mkfs.erofs.1 | 4 ++++ mkfs/main.c | 12 +++++++++++- 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index 2f09749..b149633 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -35,6 +35,8 @@ struct erofs_configure { char *c_img_path; char *c_src_path; char *c_compr_alg_master; + u64 c_compr_segsize; + int c_compr_level_master; int c_force_inodeversion; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ diff --git a/lib/compress.c b/lib/compress.c index 6cc68ed..4216fa7 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { erofs_blk_t blkaddr; /* pointing to the next blkaddr */ u16 clusterofs; + u64 segavail; }; #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ @@ -124,24 +125,33 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, /* reset clusterofs to 0 if permitted */ if (!erofs_sb_has_lz4_0padding() && - ctx->head >= ctx->clusterofs) { + ctx->clusterofs && ctx->head >= ctx->clusterofs) { ctx->head -= ctx->clusterofs; *len += ctx->clusterofs; + + ctx->segavail += ctx->clusterofs; + DBG_BUGON(ctx->segavail > cfg.c_compr_segsize); + + DBG_BUGON(ctx->segavail < EROFS_BLKSIZ); + /* so only *len will be the candidate instead of segavail */ + count = *len; + ctx->clusterofs = 0; + } else { + count = min_t(u64, ctx->segavail, *len); } - /* write uncompressed data */ - count = min(EROFS_BLKSIZ, *len); + if (count > EROFS_BLKSIZ) + count = EROFS_BLKSIZ; + /* fill zero if the uncompressed block isn't full */ memcpy(dst, ctx->queue + ctx->head, count); memset(dst + count, 0, EROFS_BLKSIZ - count); erofs_dbg("Writing %u uncompressed data to block %u", count, ctx->blkaddr); ret = blk_write(dst, ctx->blkaddr, 1); - if (ret) - return ret; - return count; + return ret ? ret : count; } static int vle_compress_one(struct erofs_inode *inode, @@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode, while (len) { bool raw; + if (ctx->segavail <= EROFS_BLKSIZ) { + if (len < ctx->segavail && !final) + break; + + goto nocompression; + } + if (len <= EROFS_BLKSIZ) { if (final) goto nocompression; break; } - count = len; + count = min_t(u64, len, ctx->segavail); ret = erofs_compress_destsize(h, compressionlevel, ctx->queue + ctx->head, &count, dst, EROFS_BLKSIZ); @@ -195,8 +212,12 @@ nocompression: return ret; raw = false; } - ctx->head += count; + DBG_BUGON(ctx->segavail < count); + ctx->segavail -= count; + if (!ctx->segavail) + ctx->segavail = cfg.c_compr_segsize; + /* write compression indexes for this blkaddr */ vle_write_indexes(ctx, count, raw); @@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE; ctx.head = ctx.tail = 0; ctx.clusterofs = 0; + ctx.segavail = cfg.c_compr_segsize; remaining = inode->i_size; while (remaining) { diff --git a/lib/config.c b/lib/config.c index da0c260..9d4bea1 100644 --- a/lib/config.c +++ b/lib/config.c @@ -23,6 +23,7 @@ void erofs_init_configure(void) cfg.c_force_inodeversion = 0; cfg.c_inline_xattr_tolerance = 2; cfg.c_unix_timestamp = -1; + cfg.c_compr_segsize = -1; } void erofs_show_config(void) diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 891c5a8..8d0fc10 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output. Forcely generate extended inodes (64-byte inodes) to output. .RE .TP +.BI "\-S " # +Set maximum blocks for each individual compress segment. +The default is 0 (disabled). +.TP .BI "\-T " # Set all files to the given UNIX timestamp. Reproducible builds requires setting all to a specific one. diff --git a/mkfs/main.c b/mkfs/main.c index 94bf1e6..0265ae9 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -61,6 +61,7 @@ static void usage(void) " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" " -EX[,...] X=extended options\n" " -T# set a fixed UNIX timestamp # to all files\n" + " -S# Set maximum blocks for each individual compress segment\n" " --exclude-path=X avoid including file X (X = exact literal path)\n" " --exclude-regex=X avoid including files that match X (X = regular expression)\n" #ifdef HAVE_LIBSELINUX @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) char *endptr; int opt, i; - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", long_options, NULL)) != -1) { switch (opt) { case 'z': @@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 'S': + i = strtoll(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid blocks per compress segment %s", + optarg); + return -EINVAL; + } + cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1; + break; case 2: opt = erofs_parse_exclude_path(optarg, false); if (opt) { -- 2.24.0 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v11] erofs-utils: introduce segment compression 2020-07-05 18:20 ` Gao Xiang via Linux-erofs @ 2020-10-07 15:04 ` Gao Xiang via Linux-erofs 0 siblings, 0 replies; 11+ messages in thread From: Gao Xiang via Linux-erofs @ 2020-10-07 15:04 UTC (permalink / raw) To: Li Guifu, Li Guifu, linux-erofs Guifu, some progress on this as well? On Mon, Jul 06, 2020 at 02:20:50AM +0800, Gao Xiang via Linux-erofs wrote: > Hi Guifu, > > On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote: > > Support segment compression which seperates files in several logic > > units (segments) and each segment is compressed independently. > > > > Advantages: > > - more friendly for data differencing; > > - it can also be used for parallel compression in the same file later. > > > > Signed-off-by: Li Guifu <bluce.lee@aliyun.com> > > --- > > Changes from v10 > > - chang variable uncomprofs to clusterofs which only used > > when write uncompress block > > Could you please test the following patch if you're available? > Does it work? > > From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001 > From: Li Guifu <bluce.lee@aliyun.com> > Date: Sun, 5 Jul 2020 16:32:30 +0800 > Subject: [PATCH v12] erofs-utils: introduce segment compression > > Support segment compression which seperates files in several logic > units (segments) and each segment is compressed independently. > > Advantages: > - more friendly for data differencing; > - it can also be used for parallel compression in the same file later. > > Signed-off-by: Li Guifu <bluce.lee@aliyun.com> > Signed-off-by: Gao Xiang <hsiangkao@redhat.com> > --- > include/erofs/config.h | 2 ++ > lib/compress.c | 38 ++++++++++++++++++++++++++++++-------- > lib/config.c | 1 + > man/mkfs.erofs.1 | 4 ++++ > mkfs/main.c | 12 +++++++++++- > 5 files changed, 48 insertions(+), 9 deletions(-) > > diff --git a/include/erofs/config.h b/include/erofs/config.h > index 2f09749..b149633 100644 > --- a/include/erofs/config.h > +++ b/include/erofs/config.h > @@ -35,6 +35,8 @@ struct erofs_configure { > char *c_img_path; > char *c_src_path; > char *c_compr_alg_master; > + u64 c_compr_segsize; > + > int c_compr_level_master; > int c_force_inodeversion; > /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ > diff --git a/lib/compress.c b/lib/compress.c > index 6cc68ed..4216fa7 100644 > --- a/lib/compress.c > +++ b/lib/compress.c > @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { > > erofs_blk_t blkaddr; /* pointing to the next blkaddr */ > u16 clusterofs; > + u64 segavail; > }; > > #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ > @@ -124,24 +125,33 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx, > > /* reset clusterofs to 0 if permitted */ > if (!erofs_sb_has_lz4_0padding() && > - ctx->head >= ctx->clusterofs) { > + ctx->clusterofs && ctx->head >= ctx->clusterofs) { > ctx->head -= ctx->clusterofs; > *len += ctx->clusterofs; > + > + ctx->segavail += ctx->clusterofs; > + DBG_BUGON(ctx->segavail > cfg.c_compr_segsize); > + > + DBG_BUGON(ctx->segavail < EROFS_BLKSIZ); > + /* so only *len will be the candidate instead of segavail */ > + count = *len; > + > ctx->clusterofs = 0; > + } else { > + count = min_t(u64, ctx->segavail, *len); > } > > - /* write uncompressed data */ > - count = min(EROFS_BLKSIZ, *len); > + if (count > EROFS_BLKSIZ) > + count = EROFS_BLKSIZ; > > + /* fill zero if the uncompressed block isn't full */ > memcpy(dst, ctx->queue + ctx->head, count); > memset(dst + count, 0, EROFS_BLKSIZ - count); > > erofs_dbg("Writing %u uncompressed data to block %u", > count, ctx->blkaddr); > ret = blk_write(dst, ctx->blkaddr, 1); > - if (ret) > - return ret; > - return count; > + return ret ? ret : count; > } > > static int vle_compress_one(struct erofs_inode *inode, > @@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode, > while (len) { > bool raw; > > + if (ctx->segavail <= EROFS_BLKSIZ) { > + if (len < ctx->segavail && !final) > + break; > + > + goto nocompression; > + } > + > if (len <= EROFS_BLKSIZ) { > if (final) > goto nocompression; > break; > } > > - count = len; > + count = min_t(u64, len, ctx->segavail); > ret = erofs_compress_destsize(h, compressionlevel, > ctx->queue + ctx->head, > &count, dst, EROFS_BLKSIZ); > @@ -195,8 +212,12 @@ nocompression: > return ret; > raw = false; > } > - > ctx->head += count; > + DBG_BUGON(ctx->segavail < count); > + ctx->segavail -= count; > + if (!ctx->segavail) > + ctx->segavail = cfg.c_compr_segsize; > + > /* write compression indexes for this blkaddr */ > vle_write_indexes(ctx, count, raw); > > @@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) > ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE; > ctx.head = ctx.tail = 0; > ctx.clusterofs = 0; > + ctx.segavail = cfg.c_compr_segsize; > remaining = inode->i_size; > > while (remaining) { > diff --git a/lib/config.c b/lib/config.c > index da0c260..9d4bea1 100644 > --- a/lib/config.c > +++ b/lib/config.c > @@ -23,6 +23,7 @@ void erofs_init_configure(void) > cfg.c_force_inodeversion = 0; > cfg.c_inline_xattr_tolerance = 2; > cfg.c_unix_timestamp = -1; > + cfg.c_compr_segsize = -1; > } > > void erofs_show_config(void) > diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 > index 891c5a8..8d0fc10 100644 > --- a/man/mkfs.erofs.1 > +++ b/man/mkfs.erofs.1 > @@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output. > Forcely generate extended inodes (64-byte inodes) to output. > .RE > .TP > +.BI "\-S " # > +Set maximum blocks for each individual compress segment. > +The default is 0 (disabled). > +.TP > .BI "\-T " # > Set all files to the given UNIX timestamp. Reproducible builds requires setting > all to a specific one. > diff --git a/mkfs/main.c b/mkfs/main.c > index 94bf1e6..0265ae9 100644 > --- a/mkfs/main.c > +++ b/mkfs/main.c > @@ -61,6 +61,7 @@ static void usage(void) > " -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n" > " -EX[,...] X=extended options\n" > " -T# set a fixed UNIX timestamp # to all files\n" > + " -S# Set maximum blocks for each individual compress segment\n" > " --exclude-path=X avoid including file X (X = exact literal path)\n" > " --exclude-regex=X avoid including files that match X (X = regular expression)\n" > #ifdef HAVE_LIBSELINUX > @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > char *endptr; > int opt, i; > > - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", > + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", > long_options, NULL)) != -1) { > switch (opt) { > case 'z': > @@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > return -EINVAL; > } > break; > + case 'S': > + i = strtoll(optarg, &endptr, 0); > + if (*endptr != '\0') { > + erofs_err("invalid blocks per compress segment %s", > + optarg); > + return -EINVAL; > + } > + cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1; > + break; > case 2: > opt = erofs_parse_exclude_path(optarg, false); > if (opt) { > -- > 2.24.0 > > > ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2020-10-07 15:04 UTC | newest] Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs 2020-06-18 23:05 ` Gao Xiang 2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs 2020-06-19 23:03 ` Gao Xiang 2020-06-21 10:51 ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs 2020-06-21 12:27 ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs 2020-06-21 14:37 ` Gao Xiang via Linux-erofs 2020-06-30 17:27 ` [PATCH v10] " Li Guifu via Linux-erofs 2020-07-05 8:32 ` [PATCH v11] " Li Guifu via Linux-erofs 2020-07-05 18:20 ` Gao Xiang via Linux-erofs 2020-10-07 15:04 ` Gao Xiang via Linux-erofs
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.