From: Gao Xiang via Linux-erofs <linux-erofs@lists.ozlabs.org>
To: Li Guifu <bluce.lee@aliyun.com>, Li Guifu <bluce.liguifu@huawei.com>
Cc: linux-erofs@lists.ozlabs.org
Subject: Re: [PATCH v11] erofs-utils: introduce segment compression
Date: Mon, 6 Jul 2020 02:20:50 +0800 [thread overview]
Message-ID: <20200705182049.GA20632@hsiangkao-HP-ZHAN-66-Pro-G1> (raw)
In-Reply-To: <20200705083230.5027-1-bluce.lee@aliyun.com>
Hi Guifu,
On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
>
> Advantages:
> - more friendly for data differencing;
> - it can also be used for parallel compression in the same file later.
>
> Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
> ---
> Changes from v10
> - chang variable uncomprofs to clusterofs which only used
> when write uncompress block
Could you please test the following patch if you're available?
Does it work?
From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001
From: Li Guifu <bluce.lee@aliyun.com>
Date: Sun, 5 Jul 2020 16:32:30 +0800
Subject: [PATCH v12] erofs-utils: introduce segment compression
Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.
Advantages:
- more friendly for data differencing;
- it can also be used for parallel compression in the same file later.
Signed-off-by: Li Guifu <bluce.lee@aliyun.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
include/erofs/config.h | 2 ++
lib/compress.c | 38 ++++++++++++++++++++++++++++++--------
lib/config.c | 1 +
man/mkfs.erofs.1 | 4 ++++
mkfs/main.c | 12 +++++++++++-
5 files changed, 48 insertions(+), 9 deletions(-)
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..b149633 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,8 @@ struct erofs_configure {
char *c_img_path;
char *c_src_path;
char *c_compr_alg_master;
+ u64 c_compr_segsize;
+
int c_compr_level_master;
int c_force_inodeversion;
/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..4216fa7 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
erofs_blk_t blkaddr; /* pointing to the next blkaddr */
u16 clusterofs;
+ u64 segavail;
};
#define Z_EROFS_LEGACY_MAP_HEADER_SIZE \
@@ -124,24 +125,33 @@ static int write_uncompressed_block(struct z_erofs_vle_compress_ctx *ctx,
/* reset clusterofs to 0 if permitted */
if (!erofs_sb_has_lz4_0padding() &&
- ctx->head >= ctx->clusterofs) {
+ ctx->clusterofs && ctx->head >= ctx->clusterofs) {
ctx->head -= ctx->clusterofs;
*len += ctx->clusterofs;
+
+ ctx->segavail += ctx->clusterofs;
+ DBG_BUGON(ctx->segavail > cfg.c_compr_segsize);
+
+ DBG_BUGON(ctx->segavail < EROFS_BLKSIZ);
+ /* so only *len will be the candidate instead of segavail */
+ count = *len;
+
ctx->clusterofs = 0;
+ } else {
+ count = min_t(u64, ctx->segavail, *len);
}
- /* write uncompressed data */
- count = min(EROFS_BLKSIZ, *len);
+ if (count > EROFS_BLKSIZ)
+ count = EROFS_BLKSIZ;
+ /* fill zero if the uncompressed block isn't full */
memcpy(dst, ctx->queue + ctx->head, count);
memset(dst + count, 0, EROFS_BLKSIZ - count);
erofs_dbg("Writing %u uncompressed data to block %u",
count, ctx->blkaddr);
ret = blk_write(dst, ctx->blkaddr, 1);
- if (ret)
- return ret;
- return count;
+ return ret ? ret : count;
}
static int vle_compress_one(struct erofs_inode *inode,
@@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode,
while (len) {
bool raw;
+ if (ctx->segavail <= EROFS_BLKSIZ) {
+ if (len < ctx->segavail && !final)
+ break;
+
+ goto nocompression;
+ }
+
if (len <= EROFS_BLKSIZ) {
if (final)
goto nocompression;
break;
}
- count = len;
+ count = min_t(u64, len, ctx->segavail);
ret = erofs_compress_destsize(h, compressionlevel,
ctx->queue + ctx->head,
&count, dst, EROFS_BLKSIZ);
@@ -195,8 +212,12 @@ nocompression:
return ret;
raw = false;
}
-
ctx->head += count;
+ DBG_BUGON(ctx->segavail < count);
+ ctx->segavail -= count;
+ if (!ctx->segavail)
+ ctx->segavail = cfg.c_compr_segsize;
+
/* write compression indexes for this blkaddr */
vle_write_indexes(ctx, count, raw);
@@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
ctx.head = ctx.tail = 0;
ctx.clusterofs = 0;
+ ctx.segavail = cfg.c_compr_segsize;
remaining = inode->i_size;
while (remaining) {
diff --git a/lib/config.c b/lib/config.c
index da0c260..9d4bea1 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
cfg.c_force_inodeversion = 0;
cfg.c_inline_xattr_tolerance = 2;
cfg.c_unix_timestamp = -1;
+ cfg.c_compr_segsize = -1;
}
void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..8d0fc10 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output.
Forcely generate extended inodes (64-byte inodes) to output.
.RE
.TP
+.BI "\-S " #
+Set maximum blocks for each individual compress segment.
+The default is 0 (disabled).
+.TP
.BI "\-T " #
Set all files to the given UNIX timestamp. Reproducible builds requires setting
all to a specific one.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..0265ae9 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
" -x# set xattr tolerance to # (< 0, disable xattrs; default 2)\n"
" -EX[,...] X=extended options\n"
" -T# set a fixed UNIX timestamp # to all files\n"
+ " -S# Set maximum blocks for each individual compress segment\n"
" --exclude-path=X avoid including file X (X = exact literal path)\n"
" --exclude-regex=X avoid including files that match X (X = regular expression)\n"
#ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
char *endptr;
int opt, i;
- while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+ while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
long_options, NULL)) != -1) {
switch (opt) {
case 'z':
@@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
return -EINVAL;
}
break;
+ case 'S':
+ i = strtoll(optarg, &endptr, 0);
+ if (*endptr != '\0') {
+ erofs_err("invalid blocks per compress segment %s",
+ optarg);
+ return -EINVAL;
+ }
+ cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1;
+ break;
case 2:
opt = erofs_parse_exclude_path(optarg, false);
if (opt) {
--
2.24.0
next prev parent reply other threads:[~2020-07-05 18:21 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-06-18 16:26 [PATCH v4] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
2020-06-18 23:05 ` Gao Xiang
2020-06-19 17:51 ` [PATCH v5] " Li Guifu via Linux-erofs
2020-06-19 23:03 ` Gao Xiang
2020-06-21 10:51 ` [PATCH v6] erofs-utils: introduce segment limits compression Li Guifu via Linux-erofs
2020-06-21 12:27 ` [PATCH v8] erofs-utils: introduce segment compression Li Guifu via Linux-erofs
2020-06-21 14:37 ` Gao Xiang via Linux-erofs
2020-06-30 17:27 ` [PATCH v10] " Li Guifu via Linux-erofs
2020-07-05 8:32 ` [PATCH v11] " Li Guifu via Linux-erofs
2020-07-05 18:20 ` Gao Xiang via Linux-erofs [this message]
2020-10-07 15:04 ` Gao Xiang via Linux-erofs
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200705182049.GA20632@hsiangkao-HP-ZHAN-66-Pro-G1 \
--to=linux-erofs@lists.ozlabs.org \
--cc=bluce.lee@aliyun.com \
--cc=bluce.liguifu@huawei.com \
--cc=hsiangkao@aol.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).