Linux-EROFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v4] erofs-utils: introduce segment compression
@ 2020-06-18 16:26 Li Guifu via Linux-erofs
  2020-06-18 23:05 ` Gao Xiang
  2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs
  0 siblings, 2 replies; 10+ messages in thread
From: Li Guifu via Linux-erofs @ 2020-06-18 16:26 UTC (permalink / raw)
  To: linux-erofs; +Cc: Li Guifu

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
---
Changes since v3 suggest by Gao Xiang<hsiangkao@gmx.com>:
 - add 'S#' parameter to custome compression segment size
 - move limit logic to size decrease

 include/erofs/config.h |  1 +
 lib/compress.c         |  8 ++++++--
 lib/config.c           |  1 +
 mkfs/main.c            | 16 +++++++++++++++-
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..9125c1e 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -36,6 +36,7 @@ struct erofs_configure {
 	char *c_src_path;
 	char *c_compr_alg_master;
 	int c_compr_level_master;
+	unsigned int c_compr_seg_size;	/* max segment compress size */
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
 	int c_inline_xattr_tolerance;
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..eb024aa 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	unsigned int comprlimits;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -163,8 +164,7 @@ static int vle_compress_one(struct erofs_inode *inode,
 				goto nocompression;
 			break;
 		}
-
-		count = len;
+		count = min(len, ctx->comprlimits);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -202,6 +202,9 @@ nocompression:
 
 		++ctx->blkaddr;
 		len -= count;
+		ctx->comprlimits -= count;
+		if (!ctx->comprlimits)
+			ctx->comprlimits = cfg.c_compr_seg_size;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
@@ -422,6 +425,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
 	remaining = inode->i_size;
+	ctx.comprlimits = cfg.c_compr_seg_size;
 
 	while (remaining) {
 		const u64 readcount = min_t(u64, remaining,
diff --git a/lib/config.c b/lib/config.c
index da0c260..1c39403 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_seg_size = 1024U * EROFS_BLKSIZ;
 }
 
 void erofs_show_config(void)
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..036d818 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               set the max input stream size # to one compress\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			cfg.c_compr_seg_size = strtol(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid compress segment size %s",
+					  optarg);
+				return -EINVAL;
+			}
+			if (cfg.c_compr_seg_size % EROFS_BLKSIZ != 0) {
+				erofs_err("segment size:%u should be align to %u",
+					  cfg.c_compr_seg_size, EROFS_BLKSIZ);
+				return -EINVAL;
+			}
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] erofs-utils: introduce segment compression
  2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
@ 2020-06-18 23:05 ` Gao Xiang
  2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs
  1 sibling, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2020-06-18 23:05 UTC (permalink / raw)
  To: Li Guifu; +Cc: linux-erofs

Hi Guifu,

On Fri, Jun 19, 2020 at 12:26:57AM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
>
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
>
> Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
> ---
> Changes since v3 suggest by Gao Xiang<hsiangkao@gmx.com>:
>  - add 'S#' parameter to custome compression segment size
>  - move limit logic to size decrease
>
>  include/erofs/config.h |  1 +
>  lib/compress.c         |  8 ++++++--
>  lib/config.c           |  1 +
>  mkfs/main.c            | 16 +++++++++++++++-

Just do a quick response for this, and will test it later.

First, You might need to update the manpage as well.

>  4 files changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 2f09749..9125c1e 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -36,6 +36,7 @@ struct erofs_configure {
>  	char *c_src_path;
>  	char *c_compr_alg_master;
>  	int c_compr_level_master;

u64 c_compr_segsize;

> +	unsigned int c_compr_seg_size;	/* max segment compress size */
>  	int c_force_inodeversion;
>  	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
>  	int c_inline_xattr_tolerance;
> diff --git a/lib/compress.c b/lib/compress.c
> index 6cc68ed..eb024aa 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
>
>  	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
>  	u16 clusterofs;
> +	unsigned int comprlimits;

How about the name "segavail"; ?

u64 segavail;


>  };
>
>  #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
> @@ -163,8 +164,7 @@ static int vle_compress_one(struct erofs_inode *inode,
>  				goto nocompression;
>  			break;
>  		}

I think we might add "if (segavail < EROFS_BLKSIZE) goto nocompression;"
since it seems better.

> -
> -		count = len;
> +		count = min(len, ctx->comprlimits);
>  		ret = erofs_compress_destsize(h, compressionlevel,
>  					      ctx->queue + ctx->head,
>  					      &count, dst, EROFS_BLKSIZ);
> @@ -202,6 +202,9 @@ nocompression:
>
>  		++ctx->blkaddr;
>  		len -= count;
> +		ctx->comprlimits -= count;
> +		if (!ctx->comprlimits)
> +			ctx->comprlimits = cfg.c_compr_seg_size;
>
>  		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
>  			const unsigned int qh_aligned =
> @@ -422,6 +425,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>  	ctx.head = ctx.tail = 0;
>  	ctx.clusterofs = 0;
>  	remaining = inode->i_size;
> +	ctx.comprlimits = cfg.c_compr_seg_size;
>
>  	while (remaining) {
>  		const u64 readcount = min_t(u64, remaining,
> diff --git a/lib/config.c b/lib/config.c
> index da0c260..1c39403 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -23,6 +23,7 @@ void erofs_init_configure(void)
>  	cfg.c_force_inodeversion = 0;
>  	cfg.c_inline_xattr_tolerance = 2;
>  	cfg.c_unix_timestamp = -1;
> +	cfg.c_compr_seg_size = 1024U * EROFS_BLKSIZ;

We don't need that limit by default, so
cfg.c_compr_segsize = -1ULL;

>  }
>
>  void erofs_show_config(void)
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 94bf1e6..036d818 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -61,6 +61,7 @@ static void usage(void)
>  	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
>  	      " -EX[,...]         X=extended options\n"
>  	      " -T#               set a fixed UNIX timestamp # to all files\n"
> +	      " -S#               set the max input stream size # to one compress\n"
>  	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
>  	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
>  #ifdef HAVE_LIBSELINUX
> @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  	char *endptr;
>  	int opt, i;
>
> -	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
> +	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
>  				 long_options, NULL)) != -1) {
>  		switch (opt) {
>  		case 'z':
> @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  				return -EINVAL;
>  			}
>  			break;
> +		case 'S':
> +			cfg.c_compr_seg_size = strtol(optarg, &endptr, 0);
> +			if (*endptr != '\0') {
> +				erofs_err("invalid compress segment size %s",
> +					  optarg);
> +				return -EINVAL;
> +			}
> +			if (cfg.c_compr_seg_size % EROFS_BLKSIZ != 0) {
> +				erofs_err("segment size:%u should be align to %u",
> +					  cfg.c_compr_seg_size, EROFS_BLKSIZ);
> +				return -EINVAL;
> +			}

if (!cfg.c_compr_segsize)
	cfg.c_compr_segsize = -1ULL;
else if (cfg.c_compr_segsize % EROFS_BLKSIZ) {
	erofs_err("segmentsize %u should be aligned with blocksize %u",
		  cfg.c_compr_seg_size, EROFS_BLKSIZ);
	return -EINVAL;
}

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v5] erofs-utils: introduce segment compression
  2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
  2020-06-18 23:05 ` Gao Xiang
@ 2020-06-19 17:51 ` Li Guifu via Linux-erofs
  2020-06-19 23:03   ` Gao Xiang
  2020-06-21 10:51   ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs
  1 sibling, 2 replies; 10+ messages in thread
From: Li Guifu via Linux-erofs @ 2020-06-19 17:51 UTC (permalink / raw)
  To: linux-erofs; +Cc: Li Guifu

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
---
 include/erofs/config.h |  1 +
 lib/compress.c         | 16 ++++++++++++++--
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 16 +++++++++++++++-
 5 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..995664d 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -36,6 +36,7 @@ struct erofs_configure {
 	char *c_src_path;
 	char *c_compr_alg_master;
 	int c_compr_level_master;
+	u64 c_compr_seg_size;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
 	int c_inline_xattr_tolerance;
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..383ee00 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -158,13 +159,19 @@ static int vle_compress_one(struct erofs_inode *inode,
 	while (len) {
 		bool raw;
 
+		count = min_t(u64, len, ctx->segavail);
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+			goto nocompression;
+		}
+
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
 
-		count = len;
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -174,8 +181,9 @@ static int vle_compress_one(struct erofs_inode *inode,
 					  inode->i_srcpath,
 					  erofs_strerror(ret));
 			}
+			count = len;
 nocompression:
-			ret = write_uncompressed_block(ctx, &len, dst);
+			ret = write_uncompressed_block(ctx, &count, dst);
 			if (ret < 0)
 				return ret;
 			count = ret;
@@ -202,6 +210,9 @@ nocompression:
 
 		++ctx->blkaddr;
 		len -= count;
+		ctx->segavail -= count;
+		if (!ctx->segavail)
+			ctx->segavail = cfg.c_compr_seg_size;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
@@ -422,6 +433,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
 	remaining = inode->i_size;
+	ctx.segavail = cfg.c_compr_seg_size;
 
 	while (remaining) {
 		const u64 readcount = min_t(u64, remaining,
diff --git a/lib/config.c b/lib/config.c
index da0c260..de982e1 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_seg_size = UINT64_MAX;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..b12cb22 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output.
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
 .TP
+.BI "\-S " #
+Set the max input stream size at one compression. The default is unsigned 64bit MAX.
+It must be algin to EROFS block size(4096).
+.TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
 You may give multiple `--exclude-path' options.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..96cc053 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               set the max input stream size # at one compression\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0' || !cfg.c_compr_seg_size) {
+				erofs_err("invalid compress segment size %s",
+					  optarg);
+				return -EINVAL;
+			}
+			if (cfg.c_compr_seg_size % EROFS_BLKSIZ) {
+				erofs_err("segment size:%"PRIu64" should be align to %u",
+					  cfg.c_compr_seg_size, EROFS_BLKSIZ);
+				return -EINVAL;
+			}
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] erofs-utils: introduce segment compression
  2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs
@ 2020-06-19 23:03   ` Gao Xiang
  2020-06-21 10:51   ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs
  1 sibling, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2020-06-19 23:03 UTC (permalink / raw)
  To: Li Guifu; +Cc: linux-erofs

On Sat, Jun 20, 2020 at 01:51:33AM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
>
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
>
> Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
> ---
>  include/erofs/config.h |  1 +
>  lib/compress.c         | 16 ++++++++++++++--
>  lib/config.c           |  1 +
>  man/mkfs.erofs.1       |  4 ++++
>  mkfs/main.c            | 16 +++++++++++++++-
>  5 files changed, 35 insertions(+), 3 deletions(-)
>
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 2f09749..995664d 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -36,6 +36,7 @@ struct erofs_configure {
>  	char *c_src_path;
>  	char *c_compr_alg_master;
>  	int c_compr_level_master;
> +	u64 c_compr_seg_size;

Could you please move this variable up a bit? Thanks.

char *c_compr_alg_master;
u64 c_compr_seg_size;
int c_compr_level_master;

>  	int c_force_inodeversion;
>  	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
>  	int c_inline_xattr_tolerance;
> diff --git a/lib/compress.c b/lib/compress.c
> index 6cc68ed..383ee00 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
>
>  	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
>  	u16 clusterofs;
> +	u64 segavail;
>  };
>
>  #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
> @@ -158,13 +159,19 @@ static int vle_compress_one(struct erofs_inode *inode,
>  	while (len) {
>  		bool raw;

unsigned int limit;

>
> +		count = min_t(u64, len, ctx->segavail);

kill this line.

> +		if (ctx->segavail <= EROFS_BLKSIZ) {
> +			if (len < ctx->segavail && !final)
> +				break;

limit = ctx->segavail;

> +			goto nocompression;
> +		}
> +
>  		if (len <= EROFS_BLKSIZ) {
>  			if (final)
>  				goto nocompression;
>  			break;
>  		}
>
> -		count = len;

count = min_t(u64, len, ctx->segavail);

>  		ret = erofs_compress_destsize(h, compressionlevel,
>  					      ctx->queue + ctx->head,
>  					      &count, dst, EROFS_BLKSIZ);
> @@ -174,8 +181,9 @@ static int vle_compress_one(struct erofs_inode *inode,
>  					  inode->i_srcpath,
>  					  erofs_strerror(ret));
>  			}
> +			count = len;

kill this line and add limit = EROFS_BLKSIZ;

>  nocompression:
> -			ret = write_uncompressed_block(ctx, &len, dst);
> +			ret = write_uncompressed_block(ctx, &count, dst);

ret = write_uncompressed_block(ctx, &count, limit, dst);

and update write_uncompressed_block as well.

>  			if (ret < 0)
>  				return ret;
>  			count = ret;
> @@ -202,6 +210,9 @@ nocompression:
>
>  		++ctx->blkaddr;
>  		len -= count;
> +		ctx->segavail -= count;
> +		if (!ctx->segavail)
> +			ctx->segavail = cfg.c_compr_seg_size;
>
>  		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
>  			const unsigned int qh_aligned =
> @@ -422,6 +433,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>  	ctx.head = ctx.tail = 0;
>  	ctx.clusterofs = 0;
>  	remaining = inode->i_size;
> +	ctx.segavail = cfg.c_compr_seg_size;
>
>  	while (remaining) {
>  		const u64 readcount = min_t(u64, remaining,
> diff --git a/lib/config.c b/lib/config.c
> index da0c260..de982e1 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -23,6 +23,7 @@ void erofs_init_configure(void)
>  	cfg.c_force_inodeversion = 0;
>  	cfg.c_inline_xattr_tolerance = 2;
>  	cfg.c_unix_timestamp = -1;
> +	cfg.c_compr_seg_size = UINT64_MAX;

cfg.c_compr_seg_size = -1;

since it is a very simple way to assign UINT_MAX by implicit sign
extension without taking care for the specific data type.

>  }
>
>  void erofs_show_config(void)
> diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
> index 891c5a8..b12cb22 100644
> --- a/man/mkfs.erofs.1
> +++ b/man/mkfs.erofs.1
> @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output.
>  Set all files to the given UNIX timestamp. Reproducible builds requires setting
>  all to a specific one.
>  .TP
> +.BI "\-S " #
> +Set the max input stream size at one compression. The default is unsigned 64bit MAX.
> +It must be algin to EROFS block size(4096).

it's hard for end users to type "max 64-bit unsigned value"...

I'd suggest "Set max input stream size for each individual segment (disabled if 0).
The default value is 0. It should be aligned with blocksize."

> +.TP
>  .BI "\-\-exclude-path=" path
>  Ignore file that matches the exact literal path.
>  You may give multiple `--exclude-path' options.
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 94bf1e6..96cc053 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -61,6 +61,7 @@ static void usage(void)
>  	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
>  	      " -EX[,...]         X=extended options\n"
>  	      " -T#               set a fixed UNIX timestamp # to all files\n"
> +	      " -S#               set the max input stream size # at one compression\n"

-S#               Set max input stream size # for each individual segment\n

>  	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
>  	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
>  #ifdef HAVE_LIBSELINUX
> @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  	char *endptr;
>  	int opt, i;
>
> -	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
> +	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
>  				 long_options, NULL)) != -1) {
>  		switch (opt) {
>  		case 'z':
> @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>  				return -EINVAL;
>  			}
>  			break;
> +		case 'S':
> +			cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0);
> +			if (*endptr != '\0' || !cfg.c_compr_seg_size) {

Disable this if cfg.c_compr_seg_size == 0

> +				erofs_err("invalid compress segment size %s",
> +					  optarg);
> +				return -EINVAL;
> +			}
> +			if (cfg.c_compr_seg_size % EROFS_BLKSIZ) {
> +				erofs_err("segment size:%"PRIu64" should be align to %u",


Could you follow my advice in the previous reply?
Although I'm not good at English, but I don't think the above message is _reasonable_.

Thanks,
Gao Xiang

> +					  cfg.c_compr_seg_size, EROFS_BLKSIZ);
> +				return -EINVAL;
> +			}
> +			break;
>  		case 2:
>  			opt = erofs_parse_exclude_path(optarg, false);
>  			if (opt) {
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v6] erofs-utils: introduce segment limits compression
  2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs
  2020-06-19 23:03   ` Gao Xiang
@ 2020-06-21 10:51   ` Li Guifu via Linux-erofs
  2020-06-21 12:27     ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
  1 sibling, 1 reply; 10+ messages in thread
From: Li Guifu via Linux-erofs @ 2020-06-21 10:51 UTC (permalink / raw)
  To: linux-erofs; +Cc: Li Guifu

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
---
Changes since v3 suggest by Gao Xiang<hsiangkao@gmx.com>:
 - add a limits varialbe to give the limits size in the write_uncompress_block
 - Set comments more readable

 include/erofs/config.h |  1 +
 lib/compress.c         | 29 +++++++++++++++++++++--------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 18 +++++++++++++++++-
 5 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..e5f1bfb 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,7 @@ struct erofs_configure {
 	char *c_img_path;
 	char *c_src_path;
 	char *c_compr_alg_master;
+	u64 c_compr_seg_size;
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..8a79895 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -116,23 +117,21 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 }
 
 static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
-				    unsigned int *len,
+				    unsigned int *len, unsigned int limits,
 				    char *dst)
 {
 	int ret;
-	unsigned int count;
+	unsigned int count = min(limits, *len); /* write uncompressed data */
 
 	/* reset clusterofs to 0 if permitted */
 	if (!erofs_sb_has_lz4_0padding() &&
 	    ctx->head >= ctx->clusterofs) {
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
+		count += ctx->clusterofs;
 		ctx->clusterofs = 0;
 	}
 
-	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
-
 	memcpy(dst, ctx->queue + ctx->head, count);
 	memset(dst + count, 0, EROFS_BLKSIZ - count);
 
@@ -157,14 +156,22 @@ static int vle_compress_one(struct erofs_inode *inode,
 
 	while (len) {
 		bool raw;
+		unsigned int limits = EROFS_BLKSIZ;
+
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+
+			limits = ctx->segavail;
+			goto nocompression;
+		}
 
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
-
-		count = len;
+		count = min_t(u64, len, ctx->segavail);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -175,7 +182,7 @@ static int vle_compress_one(struct erofs_inode *inode,
 					  erofs_strerror(ret));
 			}
 nocompression:
-			ret = write_uncompressed_block(ctx, &len, dst);
+			ret = write_uncompressed_block(ctx, &len, limits, dst);
 			if (ret < 0)
 				return ret;
 			count = ret;
@@ -203,6 +210,11 @@ nocompression:
 		++ctx->blkaddr;
 		len -= count;
 
+		if (count >= ctx->segavail)
+			ctx->segavail = cfg.c_compr_seg_size;
+		else
+			ctx->segavail -= count;
+
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
 				round_down(ctx->head, EROFS_BLKSIZ);
@@ -422,6 +434,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
 	remaining = inode->i_size;
+	ctx.segavail = cfg.c_compr_seg_size;
 
 	while (remaining) {
 		const u64 readcount = min_t(u64, remaining,
diff --git a/lib/config.c b/lib/config.c
index da0c260..fbb2914 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_seg_size = -1UL;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..0b613e4 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output.
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
 .TP
+.BI "\-S " #
+Set max input stream size for each individual segment (disabled if 0).
+The default value is 0. It should be aligned with blocksize.
+.TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
 You may give multiple `--exclude-path' options.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..0e26f4f 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               Set max input stream size # for each individual segment\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,21 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid compress segment size %s",
+					  optarg);
+				return -EINVAL;
+			}
+			if (!cfg.c_compr_seg_size) {
+				cfg.c_compr_seg_size = -1UL;
+			} else if (cfg.c_compr_seg_size % EROFS_BLKSIZ) {
+				erofs_err("segment size:%"PRIu64" should be align to %u",
+					  cfg.c_compr_seg_size, EROFS_BLKSIZ);
+				return -EINVAL;
+			}
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v8] erofs-utils: introduce segment compression
  2020-06-21 10:51   ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs
@ 2020-06-21 12:27     ` Li Guifu via Linux-erofs
  2020-06-21 14:37       ` Gao Xiang via Linux-erofs
  0 siblings, 1 reply; 10+ messages in thread
From: Li Guifu via Linux-erofs @ 2020-06-21 12:27 UTC (permalink / raw)
  To: linux-erofs; +Cc: Li Guifu

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
---
 include/erofs/config.h |  1 +
 lib/compress.c         | 47 ++++++++++++++++++++++++++----------------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 18 +++++++++++++++-
 5 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..e5f1bfb 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,7 @@ struct erofs_configure {
 	char *c_img_path;
 	char *c_src_path;
 	char *c_compr_alg_master;
+	u64 c_compr_seg_size;
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..6c0708d 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -116,23 +117,11 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 }
 
 static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
-				    unsigned int *len,
-				    char *dst)
+				    unsigned int count, char *dst)
 {
 	int ret;
-	unsigned int count;
-
-	/* reset clusterofs to 0 if permitted */
-	if (!erofs_sb_has_lz4_0padding() &&
-	    ctx->head >= ctx->clusterofs) {
-		ctx->head -= ctx->clusterofs;
-		*len += ctx->clusterofs;
-		ctx->clusterofs = 0;
-	}
-
-	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
 
+	DBG_BUGON(count > EROFS_BLKSIZ);
 	memcpy(dst, ctx->queue + ctx->head, count);
 	memset(dst + count, 0, EROFS_BLKSIZ - count);
 
@@ -157,14 +146,22 @@ static int vle_compress_one(struct erofs_inode *inode,
 
 	while (len) {
 		bool raw;
+		unsigned int limit = EROFS_BLKSIZ;
+
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+
+			limit = ctx->segavail;
+			goto nocompression;
+		}
 
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
-
-		count = len;
+		count = min_t(u64, len, ctx->segavail);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -175,10 +172,18 @@ static int vle_compress_one(struct erofs_inode *inode,
 					  erofs_strerror(ret));
 			}
 nocompression:
-			ret = write_uncompressed_block(ctx, &len, dst);
+			/* reset clusterofs to 0 if permitted */
+			if (!erofs_sb_has_lz4_0padding() &&
+			    ctx->head >= ctx->clusterofs) {
+				ctx->head -= ctx->clusterofs;
+				len += ctx->clusterofs;
+				limit += ctx->clusterofs;
+				ctx->clusterofs = 0;
+			}
+			count = min(limit, len);
+			ret = write_uncompressed_block(ctx, count, dst);
 			if (ret < 0)
 				return ret;
-			count = ret;
 			raw = true;
 		} else {
 			/* write compressed data */
@@ -203,6 +208,11 @@ nocompression:
 		++ctx->blkaddr;
 		len -= count;
 
+		if (count >= ctx->segavail)
+			ctx->segavail = cfg.c_compr_seg_size;
+		else
+			ctx->segavail -= count;
+
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
 				round_down(ctx->head, EROFS_BLKSIZ);
@@ -422,6 +432,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
 	remaining = inode->i_size;
+	ctx.segavail = cfg.c_compr_seg_size;
 
 	while (remaining) {
 		const u64 readcount = min_t(u64, remaining,
diff --git a/lib/config.c b/lib/config.c
index da0c260..721ff61 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_seg_size = -1;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..0b613e4 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output.
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
 .TP
+.BI "\-S " #
+Set max input stream size for each individual segment (disabled if 0).
+The default value is 0. It should be aligned with blocksize.
+.TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
 You may give multiple `--exclude-path' options.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..dcb01cc 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               Set max input stream size # for each individual segment\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,21 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid compress segment size %s",
+					  optarg);
+				return -EINVAL;
+			}
+			if (!cfg.c_compr_seg_size) {
+				cfg.c_compr_seg_size = -1;
+			} else if (cfg.c_compr_seg_size % EROFS_BLKSIZ) {
+				erofs_err("segment size:%"PRIu64" should be align with %u",
+					  cfg.c_compr_seg_size, EROFS_BLKSIZ);
+				return -EINVAL;
+			}
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v8] erofs-utils: introduce segment compression
  2020-06-21 12:27     ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
@ 2020-06-21 14:37       ` Gao Xiang via Linux-erofs
  2020-06-30 17:27         ` [PATCH v10] " Li Guifu via Linux-erofs
  0 siblings, 1 reply; 10+ messages in thread
From: Gao Xiang via Linux-erofs @ 2020-06-21 14:37 UTC (permalink / raw)
  To: Li Guifu; +Cc: linux-erofs

On Sun, Jun 21, 2020 at 08:27:45PM +0800, Li Guifu via Linux-erofs wrote:

...

>  nocompression:
> -			ret = write_uncompressed_block(ctx, &len, dst);
> +			/* reset clusterofs to 0 if permitted */
> +			if (!erofs_sb_has_lz4_0padding() &&
> +			    ctx->head >= ctx->clusterofs) {
> +				ctx->head -= ctx->clusterofs;
> +				len += ctx->clusterofs;
> +				limit += ctx->clusterofs;
> +				ctx->clusterofs = 0;

TL;DR: it seems still buggy here.
Pls rethink carefully and send a usable patch then...

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v10] erofs-utils: introduce segment compression
  2020-06-21 14:37       ` Gao Xiang via Linux-erofs
@ 2020-06-30 17:27         ` Li Guifu via Linux-erofs
  2020-07-05  8:32           ` [PATCH v11] " Li Guifu via Linux-erofs
  0 siblings, 1 reply; 10+ messages in thread
From: Li Guifu via Linux-erofs @ 2020-06-30 17:27 UTC (permalink / raw)
  To: linux-erofs; +Cc: Li Guifu

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
---
 include/erofs/config.h |  1 +
 lib/compress.c         | 29 +++++++++++++++++++++++------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 16 +++++++++++++++-
 5 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..e5f1bfb 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,7 @@ struct erofs_configure {
 	char *c_img_path;
 	char *c_src_path;
 	char *c_compr_alg_master;
+	u64 c_compr_seg_size;
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..2ea5809 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -116,7 +117,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 }
 
 static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
-				    unsigned int *len,
+				    unsigned int *len, unsigned int *ucomproft,
 				    char *dst)
 {
 	int ret;
@@ -125,14 +126,19 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
 	/* reset clusterofs to 0 if permitted */
 	if (!erofs_sb_has_lz4_0padding() &&
 	    ctx->head >= ctx->clusterofs) {
+		*ucomproft = ctx->clusterofs;
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
 		ctx->clusterofs = 0;
+		count = min(EROFS_BLKSIZ, *len);
+	} else {
+		*ucomproft = 0;
+		count = min_t(u64, ctx->segavail, *len);
+		if (count > EROFS_BLKSIZ)
+			count = EROFS_BLKSIZ;
 	}
 
 	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
-
 	memcpy(dst, ctx->queue + ctx->head, count);
 	memset(dst + count, 0, EROFS_BLKSIZ - count);
 
@@ -157,14 +163,21 @@ static int vle_compress_one(struct erofs_inode *inode,
 
 	while (len) {
 		bool raw;
+		unsigned int ucomproft = 0;
+
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+
+			goto nocompression;
+		}
 
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
-
-		count = len;
+		count = min_t(u64, len, ctx->segavail);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -175,7 +188,7 @@ static int vle_compress_one(struct erofs_inode *inode,
 					  erofs_strerror(ret));
 			}
 nocompression:
-			ret = write_uncompressed_block(ctx, &len, dst);
+			ret = write_uncompressed_block(ctx, &len, &ucomproft, dst);
 			if (ret < 0)
 				return ret;
 			count = ret;
@@ -202,6 +215,9 @@ nocompression:
 
 		++ctx->blkaddr;
 		len -= count;
+		ctx->segavail -= count - ucomproft;
+		if (!ctx->segavail)
+			ctx->segavail = cfg.c_compr_seg_size;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
@@ -422,6 +438,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
 	remaining = inode->i_size;
+	ctx.segavail = cfg.c_compr_seg_size;
 
 	while (remaining) {
 		const u64 readcount = min_t(u64, remaining,
diff --git a/lib/config.c b/lib/config.c
index da0c260..721ff61 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_seg_size = -1;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..2a4ef71 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output.
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
 .TP
+.BI "\-S " #
+Set max input stream size for each individual segment (disabled if 0).
+The default value is 0. It has be aligned with blocksize.
+.TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
 You may give multiple `--exclude-path' options.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..cded973 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               Set max input stream size # for each individual segment\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			i = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid compress segment size %s",
+					  optarg);
+				return -EINVAL;
+			}
+			if (!i) {
+				cfg.c_compr_seg_size = -1;
+			} else {
+				cfg.c_compr_seg_size = i * EROFS_BLKSIZ;
+			}
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v11] erofs-utils: introduce segment compression
  2020-06-30 17:27         ` [PATCH v10] " Li Guifu via Linux-erofs
@ 2020-07-05  8:32           ` Li Guifu via Linux-erofs
  2020-07-05 18:20             ` Gao Xiang via Linux-erofs
  0 siblings, 1 reply; 10+ messages in thread
From: Li Guifu via Linux-erofs @ 2020-07-05  8:32 UTC (permalink / raw)
  To: linux-erofs; +Cc: Li Guifu

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
---
Changes from v10
- chang variable uncomprofs to clusterofs which only used
  when write uncompress block

 include/erofs/config.h |  1 +
 lib/compress.c         | 30 ++++++++++++++++++++++++------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 16 +++++++++++++++-
 5 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..e5f1bfb 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,7 @@ struct erofs_configure {
 	char *c_img_path;
 	char *c_src_path;
 	char *c_compr_alg_master;
+	u64 c_compr_seg_size;
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..a2a278c 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -116,7 +117,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 }
 
 static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
-				    unsigned int *len,
+				    unsigned int *len, unsigned int *clusterofs,
 				    char *dst)
 {
 	int ret;
@@ -125,14 +126,19 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
 	/* reset clusterofs to 0 if permitted */
 	if (!erofs_sb_has_lz4_0padding() &&
 	    ctx->head >= ctx->clusterofs) {
+		*clusterofs = ctx->clusterofs;
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
 		ctx->clusterofs = 0;
+		count = min(EROFS_BLKSIZ, *len);
+	} else {
+		*clusterofs = 0;
+		count = min_t(u64, ctx->segavail, *len);
+		if (count > EROFS_BLKSIZ)
+			count = EROFS_BLKSIZ;
 	}
 
 	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
-
 	memcpy(dst, ctx->queue + ctx->head, count);
 	memset(dst + count, 0, EROFS_BLKSIZ - count);
 
@@ -157,14 +163,21 @@ static int vle_compress_one(struct erofs_inode *inode,
 
 	while (len) {
 		bool raw;
+		unsigned int clusterofs;
+
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+
+			goto nocompression;
+		}
 
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
-
-		count = len;
+		count = min_t(u64, len, ctx->segavail);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -175,11 +188,12 @@ static int vle_compress_one(struct erofs_inode *inode,
 					  erofs_strerror(ret));
 			}
 nocompression:
-			ret = write_uncompressed_block(ctx, &len, dst);
+			ret = write_uncompressed_block(ctx, &len, &clusterofs, dst);
 			if (ret < 0)
 				return ret;
 			count = ret;
 			raw = true;
+			ctx->segavail -= count - clusterofs;
 		} else {
 			/* write compressed data */
 			erofs_dbg("Writing %u compressed data to block %u",
@@ -194,6 +208,7 @@ nocompression:
 			if (ret)
 				return ret;
 			raw = false;
+			ctx->segavail -= count;
 		}
 
 		ctx->head += count;
@@ -202,6 +217,8 @@ nocompression:
 
 		++ctx->blkaddr;
 		len -= count;
+		if (!ctx->segavail)
+			ctx->segavail = cfg.c_compr_seg_size;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
@@ -422,6 +439,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
 	remaining = inode->i_size;
+	ctx.segavail = cfg.c_compr_seg_size;
 
 	while (remaining) {
 		const u64 readcount = min_t(u64, remaining,
diff --git a/lib/config.c b/lib/config.c
index da0c260..721ff61 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_seg_size = -1;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..2a4ef71 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to output.
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
 .TP
+.BI "\-S " #
+Set max input stream size for each individual segment (disabled if 0).
+The default value is 0. It has be aligned with blocksize.
+.TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
 You may give multiple `--exclude-path' options.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..cded973 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               Set max input stream size # for each individual segment\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			i = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid compress segment size %s",
+					  optarg);
+				return -EINVAL;
+			}
+			if (!i) {
+				cfg.c_compr_seg_size = -1;
+			} else {
+				cfg.c_compr_seg_size = i * EROFS_BLKSIZ;
+			}
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v11] erofs-utils: introduce segment compression
  2020-07-05  8:32           ` [PATCH v11] " Li Guifu via Linux-erofs
@ 2020-07-05 18:20             ` Gao Xiang via Linux-erofs
  0 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang via Linux-erofs @ 2020-07-05 18:20 UTC (permalink / raw)
  To: Li Guifu, Li Guifu; +Cc: linux-erofs

Hi Guifu,

On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
> 
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
> 
> Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
> ---
> Changes from v10
> - chang variable uncomprofs to clusterofs which only used
>   when write uncompress block

Could you please test the following patch if you're available?
Does it work?

From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001
From: Li Guifu <bluce.lee@aliyun.com>
Date: Sun, 5 Jul 2020 16:32:30 +0800
Subject: [PATCH v12] erofs-utils: introduce segment compression

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 include/erofs/config.h |  2 ++
 lib/compress.c         | 38 ++++++++++++++++++++++++++++++--------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 12 +++++++++++-
 5 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..b149633 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,8 @@ struct erofs_configure {
 	char *c_img_path;
 	char *c_src_path;
 	char *c_compr_alg_master;
+	u64 c_compr_segsize;
+
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..4216fa7 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -124,24 +125,33 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
 
 	/* reset clusterofs to 0 if permitted */
 	if (!erofs_sb_has_lz4_0padding() &&
-	    ctx->head >= ctx->clusterofs) {
+	    ctx->clusterofs && ctx->head >= ctx->clusterofs) {
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
+
+		ctx->segavail += ctx->clusterofs;
+		DBG_BUGON(ctx->segavail > cfg.c_compr_segsize);
+
+		DBG_BUGON(ctx->segavail < EROFS_BLKSIZ);
+		/* so only *len will be the candidate instead of segavail */
+		count = *len;
+
 		ctx->clusterofs = 0;
+	} else {
+		count = min_t(u64, ctx->segavail, *len);
 	}
 
-	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
+	if (count > EROFS_BLKSIZ)
+		count = EROFS_BLKSIZ;
 
+	/* fill zero if the uncompressed block isn't full */
 	memcpy(dst, ctx->queue + ctx->head, count);
 	memset(dst + count, 0, EROFS_BLKSIZ - count);
 
 	erofs_dbg("Writing %u uncompressed data to block %u",
 		  count, ctx->blkaddr);
 	ret = blk_write(dst, ctx->blkaddr, 1);
-	if (ret)
-		return ret;
-	return count;
+	return ret ? ret : count;
 }
 
 static int vle_compress_one(struct erofs_inode *inode,
@@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode,
 	while (len) {
 		bool raw;
 
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+
+			goto nocompression;
+		}
+
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
 
-		count = len;
+		count = min_t(u64, len, ctx->segavail);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -195,8 +212,12 @@ nocompression:
 				return ret;
 			raw = false;
 		}
-
 		ctx->head += count;
+		DBG_BUGON(ctx->segavail < count);
+		ctx->segavail -= count;
+		if (!ctx->segavail)
+			ctx->segavail = cfg.c_compr_segsize;
+
 		/* write compression indexes for this blkaddr */
 		vle_write_indexes(ctx, count, raw);
 
@@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
+	ctx.segavail = cfg.c_compr_segsize;
 	remaining = inode->i_size;
 
 	while (remaining) {
diff --git a/lib/config.c b/lib/config.c
index da0c260..9d4bea1 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_segsize = -1;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..8d0fc10 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output.
 Forcely generate extended inodes (64-byte inodes) to output.
 .RE
 .TP
+.BI "\-S " #
+Set maximum blocks for each individual compress segment.
+The default is 0 (disabled).
+.TP
 .BI "\-T " #
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..0265ae9 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               Set maximum blocks for each individual compress segment\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			i = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid blocks per compress segment %s",
+					  optarg);
+				return -EINVAL;
+			}
+			cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1;
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.24.0




^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, back to index

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
2020-06-18 23:05 ` Gao Xiang
2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs
2020-06-19 23:03   ` Gao Xiang
2020-06-21 10:51   ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs
2020-06-21 12:27     ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
2020-06-21 14:37       ` Gao Xiang via Linux-erofs
2020-06-30 17:27         ` [PATCH v10] " Li Guifu via Linux-erofs
2020-07-05  8:32           ` [PATCH v11] " Li Guifu via Linux-erofs
2020-07-05 18:20             ` Gao Xiang via Linux-erofs

Linux-EROFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-erofs/0 linux-erofs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-erofs linux-erofs/ https://lore.kernel.org/linux-erofs \
		linux-erofs@lists.ozlabs.org linux-erofs@ozlabs.org
	public-inbox-index linux-erofs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.ozlabs.lists.linux-erofs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git