linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] erofs-utils: introduce z_erofs_inmem_extent
@ 2022-09-06 11:40 ZiyangZhang
  2022-09-06 11:40 ` [PATCH 2/4] erofs-utils: lib: add rb-tree implementation ZiyangZhang
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: ZiyangZhang @ 2022-09-06 11:40 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Ziyang Zhang

From: Gao Xiang <hsiangkao@linux.alibaba.com>

In order to introduce deduplicatation for compressed pclusters.
A lookahead extent is recorded so that the following deduplication
process can adjust the previous extent on demand.

Also, in the future, it can be used for parallel compression
as well.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
---
 lib/compress.c | 87 ++++++++++++++++++++++++++++----------------------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index fd02053..3c1d9db 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -22,12 +22,19 @@
 static struct erofs_compress compresshandle;
 static unsigned int algorithmtype[2];
 
-struct z_erofs_vle_compress_ctx {
-	u8 *metacur;
+struct z_erofs_inmem_extent {
+	erofs_blk_t blkaddr;
+	unsigned int compressedblks;
+	unsigned int length;
+	bool raw;
+};
 
+struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
+	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
+
+	u8 *metacur;
 	unsigned int head, tail;
-	unsigned int compressedblks;
 	erofs_blk_t blkaddr;		/* pointing to the next blkaddr */
 	u16 clusterofs;
 };
@@ -43,7 +50,7 @@ static unsigned int vle_compressmeta_capacity(erofs_off_t filesize)
 	return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize;
 }
 
-static void vle_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
+static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
 {
 	const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN;
 	struct z_erofs_vle_decompressed_index di;
@@ -59,10 +66,10 @@ static void vle_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
 	ctx->metacur += sizeof(di);
 }
 
-static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
-			      unsigned int count, bool raw)
+static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 {
 	unsigned int clusterofs = ctx->clusterofs;
+	unsigned int count = ctx->e.length;
 	unsigned int d0 = 0, d1 = (clusterofs + count) / EROFS_BLKSIZ;
 	struct z_erofs_vle_decompressed_index di;
 	unsigned int type;
@@ -76,13 +83,13 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 		 * A lcluster cannot have three parts with the middle one which
 		 * is well-compressed for !ztailpacking cases.
 		 */
-		DBG_BUGON(!raw && !cfg.c_ztailpacking);
-		type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
+		DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking);
+		type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
 			Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
 		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
 
 		di.di_advise = advise;
-		di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+		di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
@@ -95,7 +102,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 		/* XXX: big pcluster feature should be per-inode */
 		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
-			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
+			di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else if (d0) {
@@ -119,9 +126,9 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 				di.di_u.delta[0] = cpu_to_le16(d0);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else {
-			type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
+			type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
 				Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
-			di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
 		}
 		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
 		di.di_advise = advise;
@@ -226,18 +233,16 @@ static int vle_compress_one(struct erofs_inode *inode,
 			    struct z_erofs_vle_compress_ctx *ctx,
 			    bool final)
 {
+	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
+	char *const dst = dstbuf + EROFS_BLKSIZ;
 	struct erofs_compress *const h = &compresshandle;
 	unsigned int len = ctx->tail - ctx->head;
-	unsigned int count;
 	int ret;
-	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
-	char *const dst = dstbuf + EROFS_BLKSIZ;
 
 	while (len) {
 		unsigned int pclustersize =
 			z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ;
 		bool may_inline = (cfg.c_ztailpacking && final);
-		bool raw;
 
 		if (len <= pclustersize) {
 			if (!final)
@@ -246,10 +251,11 @@ static int vle_compress_one(struct erofs_inode *inode,
 				goto nocompression;
 		}
 
-		count = min(len, cfg.c_max_decompressed_extent_bytes);
+		ctx->e.length = min(len,
+				cfg.c_max_decompressed_extent_bytes);
 		ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
-					      &count, dst, pclustersize,
-					      !(final && len == count));
+				&ctx->e.length, dst, pclustersize,
+				!(final && len == ctx->e.length));
 		if (ret <= 0) {
 			if (ret != -EAGAIN) {
 				erofs_err("failed to compress %s: %s",
@@ -267,17 +273,17 @@ nocompression:
 
 			if (ret < 0)
 				return ret;
-			count = ret;
+			ctx->e.length = ret;
 
 			/*
 			 * XXX: For now, we have to leave `ctx->compressedblks
 			 * = 1' since there is no way to generate compressed
 			 * indexes after the time that ztailpacking is decided.
 			 */
-			ctx->compressedblks = 1;
-			raw = true;
+			ctx->e.compressedblks = 1;
+			ctx->e.raw = true;
 		/* tailpcluster should be less than 1 block */
-		} else if (may_inline && len == count &&
+		} else if (may_inline && len == ctx->e.length &&
 			   ret < EROFS_BLKSIZ) {
 			if (ctx->clusterofs + len <= EROFS_BLKSIZ) {
 				inode->eof_tailraw = malloc(len);
@@ -292,19 +298,20 @@ nocompression:
 			ret = z_erofs_fill_inline_data(inode, dst, ret, false);
 			if (ret < 0)
 				return ret;
-			ctx->compressedblks = 1;
-			raw = false;
+			ctx->e.compressedblks = 1;
+			ctx->e.raw = false;
 		} else {
 			unsigned int tailused, padding;
 
-			if (may_inline && len == count)
+			if (may_inline && len == ctx->e.length)
 				tryrecompress_trailing(ctx->queue + ctx->head,
-						       &count, dst, &ret);
+						&ctx->e.length, dst, &ret);
 
 			tailused = ret & (EROFS_BLKSIZ - 1);
 			padding = 0;
-			ctx->compressedblks = BLK_ROUND_UP(ret);
-			DBG_BUGON(ctx->compressedblks * EROFS_BLKSIZ >= count);
+			ctx->e.compressedblks = BLK_ROUND_UP(ret);
+			DBG_BUGON(ctx->e.compressedblks * EROFS_BLKSIZ >=
+				  ctx->e.length);
 
 			/* zero out garbage trailing data for non-0padding */
 			if (!erofs_sb_has_lz4_0padding())
@@ -315,21 +322,22 @@ nocompression:
 
 			/* write compressed data */
 			erofs_dbg("Writing %u compressed data to %u of %u blocks",
-				  count, ctx->blkaddr, ctx->compressedblks);
+				  ctx->e.length, ctx->blkaddr,
+				  ctx->e.compressedblks);
 
 			ret = blk_write(dst - padding, ctx->blkaddr,
-					ctx->compressedblks);
+					ctx->e.compressedblks);
 			if (ret)
 				return ret;
-			raw = false;
+			ctx->e.raw = false;
 		}
+		/* write indexes for this pcluster */
+		ctx->e.blkaddr = ctx->blkaddr;
+		z_erofs_write_indexes(ctx);
 
-		ctx->head += count;
-		/* write compression indexes for this pcluster */
-		vle_write_indexes(ctx, count, raw);
-
-		ctx->blkaddr += ctx->compressedblks;
-		len -= count;
+		ctx->blkaddr += ctx->e.compressedblks;
+		ctx->head += ctx->e.length;
+		len -= ctx->e.length;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
@@ -654,6 +662,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
+	ctx.e.length = 0;
 	remaining = inode->i_size;
 
 	while (remaining) {
@@ -679,7 +688,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	DBG_BUGON(compressed_blocks < !!inode->idata_size);
 	compressed_blocks -= !!inode->idata_size;
 
-	vle_write_indexes_final(&ctx);
+	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
 	if (compressed_blocks * EROFS_BLKSIZ + inode->idata_size +
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-09-08  0:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-06 11:40 [PATCH 1/4] erofs-utils: introduce z_erofs_inmem_extent ZiyangZhang
2022-09-06 11:40 ` [PATCH 2/4] erofs-utils: lib: add rb-tree implementation ZiyangZhang
2022-09-06 11:40 ` [PATCH 3/4] erofs-utils: fuse: introduce partial-referenced pclusters ZiyangZhang
2022-09-06 11:40 ` [PATCH 4/4] erofs-utils: mkfs: introduce global compressed data deduplication ZiyangZhang
2022-09-07  5:55   ` Gao Xiang
2022-09-08  0:56     ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).