linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] erofs-utils: prepare for per-(sub)file compression strategies
@ 2021-05-10  7:22 Gao Xiang
  2021-05-10  7:23 ` [PATCH 1/4] erofs-utils: compress trailing data for big pcluster properly Gao Xiang
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Gao Xiang @ 2021-05-10  7:22 UTC (permalink / raw)
  To: linux-erofs, Li Guifu; +Cc: Gao Xiang

Hi all,

This patchset mainly provides a new helper z_erofs_get_max_pclustersize()
to prepare for per-file compression strategies, valid pclustersize can be
returned according to detailed data type or access patterns.

In order to do that, compression header is now generated on the per-file
basis as well, which will be also needed for parallel compression in
the future.

Note that
https://lore.kernel.org/r/20210510064715.29123-1-xiang@kernel.org

should be applied after "erofs-utils: compress trailing data for big
pcluster properly" is used or some compress indexes won't be parsed
correctly.

Thanks,
Gao Xiang

Gao Xiang (4):
  erofs-utils: compress trailing data for big pcluster properly
  erofs-utils: reserve physical_clusterbits[]
  erofs-utils: prepare for per-(sub)file compress strategies
  erofs-utils: sync up z_erofs_get_extent_compressedlen()

 include/erofs/internal.h |  1 -
 lib/compress.c           | 91 +++++++++++++++++++++++++++-------------
 lib/compressor.c         |  5 ++-
 lib/zmap.c               | 53 +++++++++++++----------
 4 files changed, 96 insertions(+), 54 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] erofs-utils: compress trailing data for big pcluster properly
  2021-05-10  7:22 [PATCH 0/4] erofs-utils: prepare for per-(sub)file compression strategies Gao Xiang
@ 2021-05-10  7:23 ` Gao Xiang
  2021-05-10  7:23 ` [PATCH 2/4] erofs-utils: reserve physical_clusterbits[] Gao Xiang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2021-05-10  7:23 UTC (permalink / raw)
  To: linux-erofs, Li Guifu; +Cc: Gao Xiang

Compress in smaller pcluster size if the trailing data isn't enough for
maximum pcluster size instead of leaving trailing data uncompressed.

Fixes: b71dc92df6f1 ("erofs-utils: mkfs: support multiple block compression")
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 lib/compress.c   | 11 ++++++++---
 lib/compressor.c |  5 ++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index deef6a2c8ae3..e146416890f0 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -166,9 +166,12 @@ static int vle_compress_one(struct erofs_inode *inode,
 		bool raw;
 
 		if (len <= pclustersize) {
-			if (final)
-				goto nocompression;
-			break;
+			if (final) {
+				if (len <= EROFS_BLKSIZ)
+					goto nocompression;
+			} else {
+				break;
+			}
 		}
 
 		count = len;
@@ -195,6 +198,8 @@ nocompression:
 					EROFS_BLKSIZ - tailused : 0;
 
 			ctx->compressedblks = DIV_ROUND_UP(ret, EROFS_BLKSIZ);
+			DBG_BUGON(ctx->compressedblks * EROFS_BLKSIZ >= count);
+
 			/* zero out garbage trailing data for non-0padding */
 			if (!erofs_sb_has_lz4_0padding())
 				memset(dst + ret, 0,
diff --git a/lib/compressor.c b/lib/compressor.c
index b2434e0e5418..8836e0c785ba 100644
--- a/lib/compressor.c
+++ b/lib/compressor.c
@@ -28,6 +28,7 @@ int erofs_compress_destsize(struct erofs_compress *c,
 			    void *dst,
 			    unsigned int dstsize)
 {
+	unsigned uncompressed_size;
 	int ret;
 
 	DBG_BUGON(!c->alg);
@@ -40,7 +41,9 @@ int erofs_compress_destsize(struct erofs_compress *c,
 		return ret;
 
 	/* check if there is enough gains to compress */
-	if (*srcsize <= dstsize * c->compress_threshold / 100)
+	uncompressed_size = *srcsize;
+	if (roundup(ret, EROFS_BLKSIZ) >= uncompressed_size *
+	    c->compress_threshold / 100)
 		return -EAGAIN;
 	return ret;
 }
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] erofs-utils: reserve physical_clusterbits[]
  2021-05-10  7:22 [PATCH 0/4] erofs-utils: prepare for per-(sub)file compression strategies Gao Xiang
  2021-05-10  7:23 ` [PATCH 1/4] erofs-utils: compress trailing data for big pcluster properly Gao Xiang
@ 2021-05-10  7:23 ` Gao Xiang
  2021-05-10  7:23 ` [PATCH 3/4] erofs-utils: prepare for per-(sub)file compress strategies Gao Xiang
  2021-05-10  7:23 ` [PATCH 4/4] erofs-utils: sync up z_erofs_get_extent_compressedlen() Gao Xiang
  3 siblings, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2021-05-10  7:23 UTC (permalink / raw)
  To: linux-erofs, Li Guifu; +Cc: Gao Xiang

Sync up with kernel commit
54e0b6c873dc ("erofs: reserve physical_clusterbits[]")

Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 include/erofs/internal.h |  1 -
 lib/zmap.c               | 13 -------------
 2 files changed, 14 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 1339341a0792..da7be569d8ee 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -164,7 +164,6 @@ struct erofs_inode {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
 			uint8_t  z_logical_clusterbits;
-			uint8_t  z_physical_clusterbits[2];
 		};
 	};
 #ifdef WITH_ANDROID
diff --git a/lib/zmap.c b/lib/zmap.c
index e2a54b937b7c..0c5c4f52bbd0 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -20,8 +20,6 @@ int z_erofs_fill_inode(struct erofs_inode *vi)
 		vi->z_algorithmtype[0] = 0;
 		vi->z_algorithmtype[1] = 0;
 		vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
-		vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
-		vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
 
 		vi->flags |= EROFS_I_Z_INITED;
 	}
@@ -66,17 +64,6 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi)
 			  vi->nid * 1ULL);
 		return -EFSCORRUPTED;
 	}
-	vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits +
-					((h->h_clusterbits >> 3) & 3);
-
-	if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) {
-		erofs_err("unsupported physical clusterbits %u for nid %llu",
-			  vi->z_physical_clusterbits[0], (unsigned long long)vi->nid);
-		return -EOPNOTSUPP;
-	}
-
-	vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
-					((h->h_clusterbits >> 5) & 7);
 	vi->flags |= EROFS_I_Z_INITED;
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] erofs-utils: prepare for per-(sub)file compress strategies
  2021-05-10  7:22 [PATCH 0/4] erofs-utils: prepare for per-(sub)file compression strategies Gao Xiang
  2021-05-10  7:23 ` [PATCH 1/4] erofs-utils: compress trailing data for big pcluster properly Gao Xiang
  2021-05-10  7:23 ` [PATCH 2/4] erofs-utils: reserve physical_clusterbits[] Gao Xiang
@ 2021-05-10  7:23 ` Gao Xiang
  2021-05-10  7:23 ` [PATCH 4/4] erofs-utils: sync up z_erofs_get_extent_compressedlen() Gao Xiang
  3 siblings, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2021-05-10  7:23 UTC (permalink / raw)
  To: linux-erofs, Li Guifu; +Cc: Gao Xiang

In order to adjust pclustersize on the per-(sub)file basis,
generating per-file map headers are needed instead.

In addition to that, we could use COMPACT_4B on the per-file
basis as well after this patch.

Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 lib/compress.c | 80 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 27 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index e146416890f0..89f87dc4096c 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -22,7 +22,7 @@
 static struct erofs_compress compresshandle;
 static int compressionlevel;
 
-static struct z_erofs_map_header mapheader;
+static unsigned int algorithmtype[2];
 
 struct z_erofs_vle_compress_ctx {
 	u8 *metacur;
@@ -149,12 +149,17 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	return count;
 }
 
+/* TODO: apply per-(sub)file strategies here */
+static unsigned int z_erofs_get_max_pclustersize(struct erofs_inode *inode)
+{
+	return cfg.c_physical_clusterblks * EROFS_BLKSIZ;
+}
+
 static int vle_compress_one(struct erofs_inode *inode,
 			    struct z_erofs_vle_compress_ctx *ctx,
 			    bool final)
 {
-	const unsigned int pclusterblks = cfg.c_physical_clusterblks;
-	const unsigned int pclustersize = pclusterblks * EROFS_BLKSIZ;
+	const unsigned int pclustersize = z_erofs_get_max_pclustersize(inode);
 	struct erofs_compress *const h = &compresshandle;
 	unsigned int len = ctx->tail - ctx->head;
 	unsigned int count;
@@ -342,13 +347,14 @@ static void *write_compacted_indexes(u8 *out,
 int z_erofs_convert_to_compacted_format(struct erofs_inode *inode,
 					erofs_blk_t blkaddr,
 					unsigned int legacymetasize,
-					unsigned int logical_clusterbits)
+					void *compressmeta)
 {
 	const unsigned int mpos = Z_EROFS_VLE_EXTENT_ALIGN(inode->inode_isize +
 							   inode->xattr_isize) +
 				  sizeof(struct z_erofs_map_header);
 	const unsigned int totalidx = (legacymetasize -
 				       Z_EROFS_LEGACY_MAP_HEADER_SIZE) / 8;
+	const unsigned int logical_clusterbits = inode->z_logical_clusterbits;
 	u8 *out, *in;
 	struct z_erofs_compressindex_vec cv[16];
 	/* # of 8-byte units so that it can be aligned with 32 bytes */
@@ -379,9 +385,9 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode,
 		compacted_4b_end = totalidx;
 	}
 
-	out = in = inode->compressmeta;
+	out = in = compressmeta;
 
-	out += sizeof(mapheader);
+	out += sizeof(struct z_erofs_map_header);
 	in += Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 
 	dummy_head = false;
@@ -428,11 +434,26 @@ int z_erofs_convert_to_compacted_format(struct erofs_inode *inode,
 					      4, logical_clusterbits, true,
 					      &dummy_head);
 	}
-	inode->extent_isize = out - (u8 *)inode->compressmeta;
-	inode->datalayout = EROFS_INODE_FLAT_COMPRESSION;
+	inode->extent_isize = out - (u8 *)compressmeta;
 	return 0;
 }
 
+static void z_erofs_write_mapheader(struct erofs_inode *inode,
+				    void *compressmeta)
+{
+	struct z_erofs_map_header h = {
+		.h_advise = cpu_to_le16(inode->z_advise),
+		.h_algorithmtype = inode->z_algorithmtype[1] << 4 |
+				   inode->z_algorithmtype[0],
+		/* lclustersize */
+		.h_clusterbits = inode->z_logical_clusterbits - 12,
+	};
+
+	memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
+	/* write out map header */
+	memcpy(compressmeta, &h, sizeof(struct z_erofs_map_header));
+}
+
 int erofs_write_compressed_file(struct erofs_inode *inode)
 {
 	struct erofs_buffer_head *bh;
@@ -459,9 +480,25 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		goto err_close;
 	}
 
-	memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
-	/* write out compressed header */
-	memcpy(compressmeta, &mapheader, sizeof(mapheader));
+	/* initialize per-file compression setting */
+	inode->z_advise = 0;
+	if (!cfg.c_legacy_compress) {
+		inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
+		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION;
+	} else {
+		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+	}
+
+	if (cfg.c_physical_clusterblks > 1) {
+		inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
+			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+	}
+	inode->z_algorithmtype[0] = algorithmtype[0];
+	inode->z_algorithmtype[1] = algorithmtype[1];
+	inode->z_logical_clusterbits = LOG_BLOCK_SIZE;
+
+	z_erofs_write_mapheader(inode, compressmeta);
 
 	blkaddr = erofs_mapbh(bh->block);	/* start_blkaddr */
 	ctx.blkaddr = blkaddr;
@@ -516,19 +553,19 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	 *       when both mkfs & kernel support compression inline.
 	 */
 	erofs_bdrop(bh, false);
-	inode->compressmeta = compressmeta;
 	inode->idata_size = 0;
 	inode->u.i_blocks = compressed_blocks;
 
 	legacymetasize = ctx.metacur - compressmeta;
-	if (cfg.c_legacy_compress) {
+	if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
 		inode->extent_isize = legacymetasize;
-		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
 	} else {
 		ret = z_erofs_convert_to_compacted_format(inode, blkaddr,
-							  legacymetasize, 12);
+							  legacymetasize,
+							  compressmeta);
 		DBG_BUGON(ret);
 	}
+	inode->compressmeta = compressmeta;
 	return 0;
 
 err_bdrop:
@@ -580,7 +617,6 @@ int z_erofs_build_compr_cfgs(struct erofs_buffer_head *sb_bh)
 
 int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 {
-	unsigned int algorithmtype[2];
 	/* initialize for primary compression algorithm */
 	int ret = erofs_compressor_init(&compresshandle,
 					cfg.c_compr_alg_master);
@@ -603,16 +639,13 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 		compresshandle.alg->default_level :
 		cfg.c_compr_level_master;
 
-	/* figure out mapheader */
+	/* figure out primary algorithm */
 	ret = erofs_get_compress_algorithm_id(cfg.c_compr_alg_master);
 	if (ret < 0)
 		return ret;
 
 	algorithmtype[0] = ret;	/* primary algorithm (head 0) */
 	algorithmtype[1] = 0;	/* secondary algorithm (head 1) */
-	mapheader.h_advise = 0;
-	if (!cfg.c_legacy_compress)
-		mapheader.h_advise |= Z_EROFS_ADVISE_COMPACTED_2B;
 	/*
 	 * if big pcluster is enabled, an extra CBLKCNT lcluster index needs
 	 * to be loaded in order to get those compressed block counts.
@@ -625,15 +658,8 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 			return -EINVAL;
 		}
 		erofs_sb_set_big_pcluster();
-		mapheader.h_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1;
-		if (!cfg.c_legacy_compress)
-			mapheader.h_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
-
 		erofs_warn("EXPERIMENTAL big pcluster feature in use. Use at your own risk!");
 	}
-	mapheader.h_algorithmtype = algorithmtype[1] << 4 |
-					  algorithmtype[0];
-	mapheader.h_clusterbits = LOG_BLOCK_SIZE - 12;
 
 	if (erofs_sb_has_compr_cfgs()) {
 		sbi.available_compr_algs |= 1 << ret;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] erofs-utils: sync up z_erofs_get_extent_compressedlen()
  2021-05-10  7:22 [PATCH 0/4] erofs-utils: prepare for per-(sub)file compression strategies Gao Xiang
                   ` (2 preceding siblings ...)
  2021-05-10  7:23 ` [PATCH 3/4] erofs-utils: prepare for per-(sub)file compress strategies Gao Xiang
@ 2021-05-10  7:23 ` Gao Xiang
  3 siblings, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2021-05-10  7:23 UTC (permalink / raw)
  To: linux-erofs, Li Guifu; +Cc: Gao Xiang

Sync up with in-kernel z_erofs_get_extent_compressedlen(), mainly
fix 1 lcluster-sized pcluster for big pcluster.

Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 lib/zmap.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/lib/zmap.c b/lib/zmap.c
index 0c5c4f52bbd0..1084faa6e489 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -386,16 +386,13 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
 
 	DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
 		  m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
-	if (!((map->m_flags & EROFS_MAP_ZIPPED) &&
-	      (vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1))) {
+	if (!(map->m_flags & EROFS_MAP_ZIPPED) ||
+	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
 		map->m_plen = 1 << lclusterbits;
 		return 0;
 	}
 
 	lcn = m->lcn + 1;
-	if (lcn == initial_lcn && !m->compressedlcs)
-		m->compressedlcs = 2;
-
 	if (m->compressedlcs)
 		goto out;
 
@@ -403,21 +400,46 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
 	if (err)
 		return err;
 
+	/*
+	 * If the 1st NONHEAD lcluster has already been handled initially w/o
+	 * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+	 * an internal implemenatation error is detected.
+	 *
+	 * The following code can also handle it properly anyway, but let's
+	 * BUG_ON in the debugging mode only for developers to notice that.
+	 */
+	DBG_BUGON(lcn == initial_lcn &&
+		  m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+
 	switch (m->type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		/*
+		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
+		 * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+		 */
+		m->compressedlcs = 1;
+		break;
 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
-		DBG_BUGON(m->delta[0] != 1);
-		if (m->compressedlcs) {
+		if (m->delta[0] != 1)
+			goto err_bonus_cblkcnt;
+		if (m->compressedlcs)
 			break;
-		}
+		/* fallthrough */
 	default:
 		erofs_err("cannot found CBLKCNT @ lcn %lu of nid %llu",
-			  lcn, (unsigned long long)vi->nid);
+			  lcn, vi->nid | 0ULL);
 		DBG_BUGON(1);
 		return -EFSCORRUPTED;
 	}
 out:
 	map->m_plen = m->compressedlcs << lclusterbits;
 	return 0;
+err_bonus_cblkcnt:
+	erofs_err("bogus CBLKCNT @ lcn %lu of nid %llu",
+		  lcn, vi->nid | 0ULL);
+	DBG_BUGON(1);
+	return -EFSCORRUPTED;
 }
 
 int z_erofs_map_blocks_iter(struct erofs_inode *vi,
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-05-10  7:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-10  7:22 [PATCH 0/4] erofs-utils: prepare for per-(sub)file compression strategies Gao Xiang
2021-05-10  7:23 ` [PATCH 1/4] erofs-utils: compress trailing data for big pcluster properly Gao Xiang
2021-05-10  7:23 ` [PATCH 2/4] erofs-utils: reserve physical_clusterbits[] Gao Xiang
2021-05-10  7:23 ` [PATCH 3/4] erofs-utils: prepare for per-(sub)file compress strategies Gao Xiang
2021-05-10  7:23 ` [PATCH 4/4] erofs-utils: sync up z_erofs_get_extent_compressedlen() Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).