Linux-EROFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2] erofs-utils: add a compress limit to source input stream
@ 2020-06-09 15:50 Li Guifu via Linux-erofs
  2020-06-09 18:18 ` Gao Xiang
  0 siblings, 1 reply; 2+ messages in thread
From: Li Guifu via Linux-erofs @ 2020-06-09 15:50 UTC (permalink / raw)
  To: linux-erofs; +Cc: Li Guifu

It cause a differential amplification when create binary diff
image for upgrade. Give a limits to cut compress, so the amplification
will be limit in the given size.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
---
changes since v1:
 - fix variable "readcount" use the min data with comprlimits

 include/erofs/internal.h |  1 +
 lib/compress.c           | 22 +++++++++++++++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 41da189..367c0b0 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -41,6 +41,7 @@ typedef unsigned short umode_t;
 
 #define EROFS_ISLOTBITS		5
 #define EROFS_SLOTSIZE		(1U << EROFS_ISLOTBITS)
+#define EROFS_COMPR_LIMITS	(1024U * EROFS_BLKSIZ)
 
 typedef u64 erofs_off_t;
 typedef u64 erofs_nid_t;
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..fe1cb09 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -150,7 +150,7 @@ static int vle_compress_one(struct erofs_inode *inode,
 {
 	struct erofs_compress *const h = &compresshandle;
 	unsigned int len = ctx->tail - ctx->head;
-	unsigned int count;
+	unsigned int count = 0;
 	int ret;
 	static char dstbuf[EROFS_BLKSIZ * 2];
 	char *const dst = dstbuf + EROFS_BLKSIZ;
@@ -159,7 +159,7 @@ static int vle_compress_one(struct erofs_inode *inode,
 		bool raw;
 
 		if (len <= EROFS_BLKSIZ) {
-			if (final)
+			if (!count || final)
 				goto nocompression;
 			break;
 		}
@@ -392,7 +392,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 {
 	struct erofs_buffer_head *bh;
 	struct z_erofs_vle_compress_ctx ctx;
-	erofs_off_t remaining;
+	erofs_off_t remaining, comprlimits;
 	erofs_blk_t blkaddr, compressed_blocks;
 	unsigned int legacymetasize;
 	int ret, fd;
@@ -422,10 +422,14 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
 	remaining = inode->i_size;
+	comprlimits = EROFS_COMPR_LIMITS;
 
 	while (remaining) {
-		const u64 readcount = min_t(u64, remaining,
-					    sizeof(ctx.queue) - ctx.tail);
+		const u64 readcount = min_t(u64,
+					     min_t(u64, remaining,
+						sizeof(ctx.queue) - ctx.tail),
+						comprlimits);
+
 
 		ret = read(fd, ctx.queue + ctx.tail, readcount);
 		if (ret != readcount) {
@@ -434,11 +438,19 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		}
 		remaining -= readcount;
 		ctx.tail += readcount;
+		comprlimits -= readcount;
 
+compr_continue:
 		/* do one compress round */
 		ret = vle_compress_one(inode, &ctx, false);
 		if (ret)
 			goto err_bdrop;
+		if (!comprlimits) {
+			if (ctx.head != ctx.tail)
+				goto compr_continue;
+			ctx.clusterofs = 0;
+			comprlimits = EROFS_COMPR_LIMITS;
+		}
 	}
 
 	/* do the final round */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] erofs-utils: add a compress limit to source input stream
  2020-06-09 15:50 [PATCH v2] erofs-utils: add a compress limit to source input stream Li Guifu via Linux-erofs
@ 2020-06-09 18:18 ` Gao Xiang
  0 siblings, 0 replies; 2+ messages in thread
From: Gao Xiang @ 2020-06-09 18:18 UTC (permalink / raw)
  To: Li Guifu; +Cc: linux-erofs

Hi Guifu,

On Tue, Jun 09, 2020 at 11:50:09PM +0800, Li Guifu via Linux-erofs wrote:
> It cause a differential amplification when create binary diff
> image for upgrade. Give a limits to cut compress, so the amplification
> will be limit in the given size.

Try to write a better commit message... and if we limit that by introducing
segment compression (e.g. 4M segment size), we could have the following
benefits:
 - more friendly to block diff (and more details about this);
 - it can also be used for parallel compression in the same file.

>
> Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
> ---
> changes since v1:
>  - fix variable "readcount" use the min data with comprlimits
>
>  include/erofs/internal.h |  1 +
>  lib/compress.c           | 22 +++++++++++++++++-----
>  2 files changed, 18 insertions(+), 5 deletions(-)
>
> diff --git a/include/erofs/internal.h b/include/erofs/internal.h
> index 41da189..367c0b0 100644
> --- a/include/erofs/internal.h
> +++ b/include/erofs/internal.h
> @@ -41,6 +41,7 @@ typedef unsigned short umode_t;
>
>  #define EROFS_ISLOTBITS		5
>  #define EROFS_SLOTSIZE		(1U << EROFS_ISLOTBITS)
> +#define EROFS_COMPR_LIMITS	(1024U * EROFS_BLKSIZ)

Could we put it into cfg and add a new command line
argument for this? how about compress_segment_size?

>
>  typedef u64 erofs_off_t;
>  typedef u64 erofs_nid_t;
> diff --git a/lib/compress.c b/lib/compress.c
> index 6cc68ed..fe1cb09 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -150,7 +150,7 @@ static int vle_compress_one(struct erofs_inode *inode,
>  {
>  	struct erofs_compress *const h = &compresshandle;
>  	unsigned int len = ctx->tail - ctx->head;
> -	unsigned int count;
> +	unsigned int count = 0;
>  	int ret;
>  	static char dstbuf[EROFS_BLKSIZ * 2];
>  	char *const dst = dstbuf + EROFS_BLKSIZ;
> @@ -159,7 +159,7 @@ static int vle_compress_one(struct erofs_inode *inode,
>  		bool raw;
>
>  		if (len <= EROFS_BLKSIZ) {
> -			if (final)
> +			if (!count || final)

I think you could avoid this trick by adding a counter in
z_erofs_vle_compress_ctx or whatever place...

>  				goto nocompression;
>  			break;
>  		}
> @@ -392,7 +392,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>  {
>  	struct erofs_buffer_head *bh;
>  	struct z_erofs_vle_compress_ctx ctx;
> -	erofs_off_t remaining;
> +	erofs_off_t remaining, comprlimits;
>  	erofs_blk_t blkaddr, compressed_blocks;
>  	unsigned int legacymetasize;
>  	int ret, fd;
> @@ -422,10 +422,14 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>  	ctx.head = ctx.tail = 0;
>  	ctx.clusterofs = 0;
>  	remaining = inode->i_size;
> +	comprlimits = EROFS_COMPR_LIMITS;
>
>  	while (remaining) {
> -		const u64 readcount = min_t(u64, remaining,
> -					    sizeof(ctx.queue) - ctx.tail);
> +		const u64 readcount = min_t(u64,
> +					     min_t(u64, remaining,
> +						sizeof(ctx.queue) - ctx.tail),
> +						comprlimits);
> +

We don't need to limit I/O... it would become ineffective
when such limits are small...

>
>  		ret = read(fd, ctx.queue + ctx.tail, readcount);
>  		if (ret != readcount) {
> @@ -434,11 +438,19 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>  		}
>  		remaining -= readcount;
>  		ctx.tail += readcount;
> +		comprlimits -= readcount;
>
> +compr_continue:
>  		/* do one compress round */
>  		ret = vle_compress_one(inode, &ctx, false);
>  		if (ret)
>  			goto err_bdrop;
> +		if (!comprlimits) {
> +			if (ctx.head != ctx.tail)
> +				goto compr_continue;
> +			ctx.clusterofs = 0;
> +			comprlimits = EROFS_COMPR_LIMITS;

this can be more clearer...

Thanks,
Gao Xiang

> +		}
>  	}
>
>  	/* do the final round */
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-09 15:50 [PATCH v2] erofs-utils: add a compress limit to source input stream Li Guifu via Linux-erofs
2020-06-09 18:18 ` Gao Xiang

Linux-EROFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-erofs/0 linux-erofs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-erofs linux-erofs/ https://lore.kernel.org/linux-erofs \
		linux-erofs@lists.ozlabs.org linux-erofs@ozlabs.org
	public-inbox-index linux-erofs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.ozlabs.lists.linux-erofs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git