All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gao Xiang via Linux-erofs <linux-erofs@lists.ozlabs.org>
To: Li Guifu <bluce.lee@aliyun.com>, Li Guifu <bluce.liguifu@huawei.com>
Cc: linux-erofs@lists.ozlabs.org
Subject: Re: [PATCH v11] erofs-utils: introduce segment compression
Date: Mon, 6 Jul 2020 02:20:50 +0800	[thread overview]
Message-ID: <20200705182049.GA20632@hsiangkao-HP-ZHAN-66-Pro-G1> (raw)
In-Reply-To: <20200705083230.5027-1-bluce.lee@aliyun.com>

Hi Guifu,

On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
> 
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
> 
> Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
> ---
> Changes from v10
> - chang variable uncomprofs to clusterofs which only used
>   when write uncompress block

Could you please test the following patch if you're available?
Does it work?

From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001
From: Li Guifu <bluce.lee@aliyun.com>
Date: Sun, 5 Jul 2020 16:32:30 +0800
Subject: [PATCH v12] erofs-utils: introduce segment compression

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 include/erofs/config.h |  2 ++
 lib/compress.c         | 38 ++++++++++++++++++++++++++++++--------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 12 +++++++++++-
 5 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..b149633 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,8 @@ struct erofs_configure {
 	char *c_img_path;
 	char *c_src_path;
 	char *c_compr_alg_master;
+	u64 c_compr_segsize;
+
 	int c_compr_level_master;
 	int c_force_inodeversion;
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..4216fa7 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
 	erofs_blk_t blkaddr;	/* pointing to the next blkaddr */
 	u16 clusterofs;
+	u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	\
@@ -124,24 +125,33 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
 
 	/* reset clusterofs to 0 if permitted */
 	if (!erofs_sb_has_lz4_0padding() &&
-	    ctx->head >= ctx->clusterofs) {
+	    ctx->clusterofs && ctx->head >= ctx->clusterofs) {
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
+
+		ctx->segavail += ctx->clusterofs;
+		DBG_BUGON(ctx->segavail > cfg.c_compr_segsize);
+
+		DBG_BUGON(ctx->segavail < EROFS_BLKSIZ);
+		/* so only *len will be the candidate instead of segavail */
+		count = *len;
+
 		ctx->clusterofs = 0;
+	} else {
+		count = min_t(u64, ctx->segavail, *len);
 	}
 
-	/* write uncompressed data */
-	count = min(EROFS_BLKSIZ, *len);
+	if (count > EROFS_BLKSIZ)
+		count = EROFS_BLKSIZ;
 
+	/* fill zero if the uncompressed block isn't full */
 	memcpy(dst, ctx->queue + ctx->head, count);
 	memset(dst + count, 0, EROFS_BLKSIZ - count);
 
 	erofs_dbg("Writing %u uncompressed data to block %u",
 		  count, ctx->blkaddr);
 	ret = blk_write(dst, ctx->blkaddr, 1);
-	if (ret)
-		return ret;
-	return count;
+	return ret ? ret : count;
 }
 
 static int vle_compress_one(struct erofs_inode *inode,
@@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode,
 	while (len) {
 		bool raw;
 
+		if (ctx->segavail <= EROFS_BLKSIZ) {
+			if (len < ctx->segavail && !final)
+				break;
+
+			goto nocompression;
+		}
+
 		if (len <= EROFS_BLKSIZ) {
 			if (final)
 				goto nocompression;
 			break;
 		}
 
-		count = len;
+		count = min_t(u64, len, ctx->segavail);
 		ret = erofs_compress_destsize(h, compressionlevel,
 					      ctx->queue + ctx->head,
 					      &count, dst, EROFS_BLKSIZ);
@@ -195,8 +212,12 @@ nocompression:
 				return ret;
 			raw = false;
 		}
-
 		ctx->head += count;
+		DBG_BUGON(ctx->segavail < count);
+		ctx->segavail -= count;
+		if (!ctx->segavail)
+			ctx->segavail = cfg.c_compr_segsize;
+
 		/* write compression indexes for this blkaddr */
 		vle_write_indexes(ctx, count, raw);
 
@@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
+	ctx.segavail = cfg.c_compr_segsize;
 	remaining = inode->i_size;
 
 	while (remaining) {
diff --git a/lib/config.c b/lib/config.c
index da0c260..9d4bea1 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
 	cfg.c_force_inodeversion = 0;
 	cfg.c_inline_xattr_tolerance = 2;
 	cfg.c_unix_timestamp = -1;
+	cfg.c_compr_segsize = -1;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..8d0fc10 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output.
 Forcely generate extended inodes (64-byte inodes) to output.
 .RE
 .TP
+.BI "\-S " #
+Set maximum blocks for each individual compress segment.
+The default is 0 (disabled).
+.TP
 .BI "\-T " #
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..0265ae9 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
 	      " -x#               set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
 	      " -EX[,...]         X=extended options\n"
 	      " -T#               set a fixed UNIX timestamp # to all files\n"
+	      " -S#               Set maximum blocks for each individual compress segment\n"
 	      " --exclude-path=X  avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 	char *endptr;
 	int opt, i;
 
-	while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+	while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
 				 long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'z':
@@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 				return -EINVAL;
 			}
 			break;
+		case 'S':
+			i = strtoll(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid blocks per compress segment %s",
+					  optarg);
+				return -EINVAL;
+			}
+			cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1;
+			break;
 		case 2:
 			opt = erofs_parse_exclude_path(optarg, false);
 			if (opt) {
-- 
2.24.0




  reply	other threads:[~2020-07-05 18:21 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
2020-06-18 23:05 ` Gao Xiang
2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs
2020-06-19 23:03   ` Gao Xiang
2020-06-21 10:51   ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs
2020-06-21 12:27     ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
2020-06-21 14:37       ` Gao Xiang via Linux-erofs
2020-06-30 17:27         ` [PATCH v10] " Li Guifu via Linux-erofs
2020-07-05  8:32           ` [PATCH v11] " Li Guifu via Linux-erofs
2020-07-05 18:20             ` Gao Xiang via Linux-erofs [this message]
2020-10-07 15:04               ` Gao Xiang via Linux-erofs

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200705182049.GA20632@hsiangkao-HP-ZHAN-66-Pro-G1 \
    --to=linux-erofs@lists.ozlabs.org \
    --cc=bluce.lee@aliyun.com \
    --cc=bluce.liguifu@huawei.com \
    --cc=hsiangkao@aol.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.