linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/8] erofs-utils: collection for fragments and dedupe
@ 2022-09-26 15:25 Gao Xiang
  2022-09-26 15:25 ` [PATCH 1/8] erofs-utils: fuse: support interlaced uncompressed pcluster Gao Xiang
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

This is a collection patchset to resolve conflicts for the following
features:
https://lore.kernel.org/r/cover.1663065968.git.huyue2@coolpad.com (fragments)
and
https://lore.kernel.org/r/20220909045821.104499-1-ZiyangZhang@linux.alibaba.com (dedupe)

Except that the fragment feature still has some TODOs. For example,
duplicated fragment data can still exist in the packed file and
needs to be optimized to avoid fragment data duplication. However
such improvement is purely a userspace strategy improvement and I
think Yue Hu will continue working on this.

I've tested fragments + dedupe with the following testcase:

dataset: Linux 5.10 + Linux 5.10.87 source code
pcluster size: 4k

compressed vanilla		658845696
fragment			634245120
dedupe				488689664
dedupe + fragment               425848832

Gao Xiang (2):
  erofs-utils: introduce z_erofs_inmem_extent
  erofs-utils: fuse: introduce partial-referenced pclusters

Yue Hu (4):
  erofs-utils: fuse: support interlaced uncompressed pcluster
  erofs-utils: lib: support fragments data decompression
  erofs-utils: mkfs: support interlaced uncompressed data layout
  erofs-utils: mkfs: support fragments

Ziyang Zhang (2):
  erofs-utils: lib: add rb-tree implementation
  erofs-utils: mkfs: introduce global compressed data deduplication

 include/erofs/compress.h   |  11 +-
 include/erofs/config.h     |   4 +-
 include/erofs/decompress.h |   3 +
 include/erofs/dedupe.h     |  39 +++
 include/erofs/fragments.h  |  28 ++
 include/erofs/inode.h      |   1 +
 include/erofs/internal.h   |  14 +
 include/erofs_fs.h         |  33 ++-
 lib/Makefile.am            |   5 +-
 lib/compress.c             | 296 +++++++++++++++------
 lib/data.c                 |  29 ++-
 lib/decompress.c           |  19 +-
 lib/dedupe.c               | 176 +++++++++++++
 lib/fragments.c            |  65 +++++
 lib/inode.c                |  58 ++++-
 lib/rb_tree.c              | 512 +++++++++++++++++++++++++++++++++++++
 lib/rb_tree.h              | 104 ++++++++
 lib/rolling_hash.h         |  60 +++++
 lib/super.c                |   1 +
 lib/zmap.c                 |  59 ++++-
 mkfs/main.c                |  77 +++++-
 21 files changed, 1492 insertions(+), 102 deletions(-)
 create mode 100644 include/erofs/dedupe.h
 create mode 100644 include/erofs/fragments.h
 create mode 100644 lib/dedupe.c
 create mode 100644 lib/fragments.c
 create mode 100644 lib/rb_tree.c
 create mode 100644 lib/rb_tree.h
 create mode 100644 lib/rolling_hash.h

-- 
2.24.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/8] erofs-utils: fuse: support interlaced uncompressed pcluster
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  2022-09-26 15:25 ` [PATCH 2/8] erofs-utils: lib: support fragments data decompression Gao Xiang
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

From: Yue Hu <huyue2@coolpad.com>

Support uncompressed data layout with on-disk interlaced offset in
compression mode for erofsfuse.

Signed-off-by: Yue Hu <huyue2@coolpad.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/decompress.h |  3 +++
 include/erofs/internal.h   |  1 +
 include/erofs_fs.h         |  2 ++
 lib/data.c                 |  7 ++++++-
 lib/decompress.c           | 19 ++++++++++++++++---
 lib/zmap.c                 | 12 +++++++++---
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/include/erofs/decompress.h b/include/erofs/decompress.h
index 82bf7b8..a9067cb 100644
--- a/include/erofs/decompress.h
+++ b/include/erofs/decompress.h
@@ -23,6 +23,9 @@ struct z_erofs_decompress_req {
 	unsigned int decodedskip;
 	unsigned int inputsize, decodedlength;
 
+	/* cut point of interlaced uncompressed data */
+	unsigned int interlaced_offset;
+
 	/* indicate the algorithm will be used for decompression */
 	unsigned int alg;
 	bool partial_decoding;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 2e0aae8..a4a1fe3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -311,6 +311,7 @@ struct erofs_map_blocks {
 
 enum {
 	Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
+	Z_EROFS_COMPRESSION_INTERLACED,
 	Z_EROFS_COMPRESSION_RUNTIME_MAX
 };
 
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 08f9761..b8a7421 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -294,11 +294,13 @@ struct z_erofs_lzma_cfgs {
  * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
  * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
  * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
+ * bit 4 : interlaced plain pcluster (0 - off; 1 - on)
  */
 #define Z_EROFS_ADVISE_COMPACTED_2B		0x0001
 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1		0x0002
 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2		0x0004
 #define Z_EROFS_ADVISE_INLINE_PCLUSTER		0x0008
+#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER	0x0010
 
 struct z_erofs_map_header {
 	__le16	h_reserved1;
diff --git a/lib/data.c b/lib/data.c
index ad7b2cb..af52fde 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -226,7 +226,7 @@ static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
 	};
 	struct erofs_map_dev mdev;
 	bool partial;
-	unsigned int bufsize = 0;
+	unsigned int bufsize = 0, interlaced_offset;
 	char *raw = NULL;
 	int ret = 0;
 
@@ -287,10 +287,15 @@ static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
 		if (ret < 0)
 			break;
 
+		interlaced_offset = 0;
+		if (map.m_algorithmformat == Z_EROFS_COMPRESSION_INTERLACED)
+			interlaced_offset = erofs_blkoff(map.m_la);
+
 		ret = z_erofs_decompress(&(struct z_erofs_decompress_req) {
 					.in = raw,
 					.out = buffer + end - offset,
 					.decodedskip = skip,
+					.interlaced_offset = interlaced_offset,
 					.inputsize = map.m_plen,
 					.decodedlength = length,
 					.alg = map.m_algorithmformat,
diff --git a/lib/decompress.c b/lib/decompress.c
index 1661f91..14e91b2 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -131,13 +131,26 @@ out:
 
 int z_erofs_decompress(struct z_erofs_decompress_req *rq)
 {
-	if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
-		if (rq->inputsize > EROFS_BLKSIZ)
-			return -EFSCORRUPTED;
+	if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) {
+		unsigned int count, rightpart, skip;
+
+		/* XXX: should support inputsize >= EROFS_BLKSIZ later */
+                if (rq->inputsize > EROFS_BLKSIZ)
+                        return -EFSCORRUPTED;
 
 		DBG_BUGON(rq->decodedlength > EROFS_BLKSIZ);
 		DBG_BUGON(rq->decodedlength < rq->decodedskip);
+		count = rq->decodedlength - rq->decodedskip;
+		skip = erofs_blkoff(rq->interlaced_offset + rq->decodedskip);
+		rightpart = min(EROFS_BLKSIZ - skip, count);
+		memcpy(rq->out, rq->in + skip, rightpart);
+		memcpy(rq->out + rightpart, rq->in, count - rightpart);
+		return 0;
+	} else if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
+		if (rq->decodedlength > rq->inputsize)
+			return -EFSCORRUPTED;
 
+		DBG_BUGON(rq->decodedlength < rq->decodedskip);
 		memcpy(rq->out, rq->in + rq->decodedskip,
 		       rq->decodedlength - rq->decodedskip);
 		return 0;
diff --git a/lib/zmap.c b/lib/zmap.c
index abe0d31..806d608 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -616,10 +616,16 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi,
 			goto out;
 	}
 
-	if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
-		map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
-	else
+	if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) {
+		if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
+			map->m_algorithmformat =
+				Z_EROFS_COMPRESSION_INTERLACED;
+		else
+			map->m_algorithmformat =
+				Z_EROFS_COMPRESSION_SHIFTED;
+	} else {
 		map->m_algorithmformat = vi->z_algorithmtype[0];
+	}
 
 	if (flags & EROFS_GET_BLOCKS_FIEMAP) {
 		err = z_erofs_get_extent_decompressedlen(&m);
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/8] erofs-utils: lib: support fragments data decompression
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
  2022-09-26 15:25 ` [PATCH 1/8] erofs-utils: fuse: support interlaced uncompressed pcluster Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  2022-09-30 13:08   ` [PATCH] " Yue Hu
  2022-09-26 15:25 ` [PATCH 3/8] erofs-utils: introduce z_erofs_inmem_extent Gao Xiang
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

From: Yue Hu <huyue2@coolpad.com>

Add compressed fragments support for erofsfuse.

Signed-off-by: Yue Hu <huyue2@coolpad.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/internal.h |  6 ++++++
 include/erofs_fs.h       | 26 +++++++++++++++++++------
 lib/data.c               | 19 +++++++++++++++++++
 lib/super.c              |  1 +
 lib/zmap.c               | 41 +++++++++++++++++++++++++++++++++++++++-
 5 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index a4a1fe3..dd3776c 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -102,6 +102,7 @@ struct erofs_sb_info {
 		u16 devt_slotoff;		/* used for mkfs */
 		u16 device_id_mask;		/* used for others */
 	};
+	erofs_nid_t packed_nid;
 };
 
 /* global sbi */
@@ -132,6 +133,7 @@ EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
 EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
 EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
 EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
+EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
 
 #define EROFS_I_EA_INITED	(1 << 0)
@@ -209,6 +211,7 @@ struct erofs_inode {
 #ifdef WITH_ANDROID
 	uint64_t capabilities;
 #endif
+	erofs_off_t fragmentoff;
 };
 
 static inline bool is_inode_layout_compression(struct erofs_inode *inode)
@@ -279,6 +282,7 @@ enum {
 	BH_Mapped,
 	BH_Encoded,
 	BH_FullMapped,
+	BH_Fragment,
 };
 
 /* Has a disk mapping */
@@ -289,6 +293,8 @@ enum {
 #define EROFS_MAP_ENCODED	(1 << BH_Encoded)
 /* The length of extent is full */
 #define EROFS_MAP_FULL_MAPPED	(1 << BH_FullMapped)
+/* Located in the special packed inode */
+#define EROFS_MAP_FRAGMENT	(1 << BH_Fragment)
 
 struct erofs_map_blocks {
 	char mpage[EROFS_BLKSIZ];
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index b8a7421..e492ad9 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -25,13 +25,15 @@
 #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE	0x00000004
 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE	0x00000008
 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING	0x00000010
+#define EROFS_FEATURE_INCOMPAT_FRAGMENTS	0x00000020
 #define EROFS_ALL_FEATURE_INCOMPAT		\
 	(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
 	 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
 	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
 	 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
 	 EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
-	 EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
+	 EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
+	 EROFS_FEATURE_INCOMPAT_FRAGMENTS)
 
 #define EROFS_SB_EXTSLOT_SIZE	16
 
@@ -73,7 +75,9 @@ struct erofs_super_block {
 	} __packed u1;
 	__le16 extra_devices;	/* # of devices besides the primary device */
 	__le16 devt_slotoff;	/* startoff = devt_slotoff * devt_slotsize */
-	__u8 reserved2[38];
+	__u8 reserved[6];
+	__le64 packed_nid;	/* nid of the special packed inode */
+	__u8 reserved2[24];
 };
 
 /*
@@ -295,17 +299,26 @@ struct z_erofs_lzma_cfgs {
  * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
  * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
  * bit 4 : interlaced plain pcluster (0 - off; 1 - on)
+ * bit 5 : fragment pcluster (0 - off; 1 - on)
  */
 #define Z_EROFS_ADVISE_COMPACTED_2B		0x0001
 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1		0x0002
 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2		0x0004
 #define Z_EROFS_ADVISE_INLINE_PCLUSTER		0x0008
 #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER	0x0010
+#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER	0x0020
 
+#define Z_EROFS_FRAGMENT_INODE_BIT		7
 struct z_erofs_map_header {
-	__le16	h_reserved1;
-	/* record the size of tailpacking data */
-	__le16  h_idata_size;
+	union {
+		/* direct addressing for fragment offset */
+		__le32	h_fragmentoff;
+		struct {
+			__le16  h_reserved1;
+			/* record the size of tailpacking data */
+			__le16	h_idata_size;
+		};
+	};
 	__le16	h_advise;
 	/*
 	 * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
@@ -314,7 +327,8 @@ struct z_erofs_map_header {
 	__u8	h_algorithmtype;
 	/*
 	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
-	 * bit 3-7 : reserved.
+	 * bit 3-6 : reserved;
+	 * bit 7   : move the whole file into packed inode or not.
 	 */
 	__u8	h_clusterbits;
 };
diff --git a/lib/data.c b/lib/data.c
index af52fde..bcb0f7e 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -275,6 +275,25 @@ static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
 			continue;
 		}
 
+		if (map.m_flags & EROFS_MAP_FRAGMENT) {
+			struct erofs_inode packed_inode = {
+				.nid = sbi.packed_nid,
+			};
+
+			ret = erofs_read_inode_from_disk(&packed_inode);
+			if (ret) {
+				erofs_err("failed to read packed inode from disk");
+				return ret;
+			}
+
+			ret = z_erofs_read_data(&packed_inode,
+					buffer + end - offset, length - skip,
+					inode->fragmentoff + skip);
+			if (ret < 0)
+				break;
+			continue;
+		}
+
 		if (map.m_plen > bufsize) {
 			bufsize = map.m_plen;
 			raw = realloc(raw, bufsize);
diff --git a/lib/super.c b/lib/super.c
index b267412..30aeb36 100644
--- a/lib/super.c
+++ b/lib/super.c
@@ -104,6 +104,7 @@ int erofs_read_superblock(void)
 	sbi.xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
 	sbi.islotbits = EROFS_ISLOTBITS;
 	sbi.root_nid = le16_to_cpu(dsb->root_nid);
+	sbi.packed_nid = le64_to_cpu(dsb->packed_nid);
 	sbi.inos = le64_to_cpu(dsb->inos);
 	sbi.checksum = le32_to_cpu(dsb->checksum);
 
diff --git a/lib/zmap.c b/lib/zmap.c
index 806d608..2c2ba01 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -49,6 +49,17 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi)
 		return -EIO;
 
 	h = (struct z_erofs_map_header *)buf;
+	/*
+	 * if the highest bit of the 8-byte map header is set, the whole file
+	 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
+	 */
+	if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
+		vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+		vi->fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
+		vi->z_tailextent_headlcn = 0;
+		goto out;
+	}
+
 	vi->z_advise = le16_to_cpu(h->h_advise);
 	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
 	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
@@ -83,6 +94,17 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi)
 		if (ret < 0)
 			return ret;
 	}
+	if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
+	    !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
+		struct erofs_map_blocks map = { .index = UINT_MAX };
+
+		vi->fragmentoff = le32_to_cpu(h->h_fragmentoff);
+		ret = z_erofs_do_map_blocks(vi, &map,
+					    EROFS_GET_BLOCKS_FINDTAIL);
+		if (ret < 0)
+			return ret;
+	}
+out:
 	vi->flags |= EROFS_I_Z_INITED;
 	return 0;
 }
@@ -546,6 +568,7 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi,
 				 int flags)
 {
 	bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
+	bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
 	struct z_erofs_maprecorder m = {
 		.inode = vi,
 		.map = map,
@@ -603,12 +626,19 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi,
 	}
 
 	map->m_llen = end - map->m_la;
-	if (flags & EROFS_GET_BLOCKS_FINDTAIL)
+	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
 		vi->z_tailextent_headlcn = m.lcn;
+		/* for non-compact indexes, fragmentoff is 64 bits */
+		if (fragment &&
+		    vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+			vi->fragmentoff |= (u64)m.pblk << 32;
+	}
 	if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
 		map->m_flags |= EROFS_MAP_META;
 		map->m_pa = vi->z_idataoff;
 		map->m_plen = vi->z_idata_size;
+	} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
+		map->m_flags |= EROFS_MAP_FRAGMENT;
 	} else {
 		map->m_pa = blknr_to_addr(m.pblk);
 		err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
@@ -658,6 +688,15 @@ int z_erofs_map_blocks_iter(struct erofs_inode *vi,
 	if (err)
 		goto out;
 
+	if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
+	    !vi->z_tailextent_headlcn) {
+		map->m_la = 0;
+		map->m_llen = vi->i_size;
+		map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
+				EROFS_MAP_FRAGMENT;
+		goto out;
+	}
+
 	err = z_erofs_do_map_blocks(vi, map, flags);
 out:
 	DBG_BUGON(err < 0 && err != -ENOMEM);
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/8] erofs-utils: introduce z_erofs_inmem_extent
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
  2022-09-26 15:25 ` [PATCH 1/8] erofs-utils: fuse: support interlaced uncompressed pcluster Gao Xiang
  2022-09-26 15:25 ` [PATCH 2/8] erofs-utils: lib: support fragments data decompression Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  2022-09-26 15:25 ` [PATCH 4/8] erofs-utils: mkfs: support interlaced uncompressed data layout Gao Xiang
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

In order to introduce deduplicatation for compressed pclusters.
A lookahead extent is recorded so that the following deduplication
process can adjust the previous extent on demand.

Also, in the future, it can be used for parallel compression
as well.

Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/compress.c | 87 ++++++++++++++++++++++++++++----------------------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index fd02053..3c1d9db 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -22,12 +22,19 @@
 static struct erofs_compress compresshandle;
 static unsigned int algorithmtype[2];
 
-struct z_erofs_vle_compress_ctx {
-	u8 *metacur;
+struct z_erofs_inmem_extent {
+	erofs_blk_t blkaddr;
+	unsigned int compressedblks;
+	unsigned int length;
+	bool raw;
+};
 
+struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
+	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
+
+	u8 *metacur;
 	unsigned int head, tail;
-	unsigned int compressedblks;
 	erofs_blk_t blkaddr;		/* pointing to the next blkaddr */
 	u16 clusterofs;
 };
@@ -43,7 +50,7 @@ static unsigned int vle_compressmeta_capacity(erofs_off_t filesize)
 	return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize;
 }
 
-static void vle_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
+static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
 {
 	const unsigned int type = Z_EROFS_VLE_CLUSTER_TYPE_PLAIN;
 	struct z_erofs_vle_decompressed_index di;
@@ -59,10 +66,10 @@ static void vle_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
 	ctx->metacur += sizeof(di);
 }
 
-static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
-			      unsigned int count, bool raw)
+static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 {
 	unsigned int clusterofs = ctx->clusterofs;
+	unsigned int count = ctx->e.length;
 	unsigned int d0 = 0, d1 = (clusterofs + count) / EROFS_BLKSIZ;
 	struct z_erofs_vle_decompressed_index di;
 	unsigned int type;
@@ -76,13 +83,13 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 		 * A lcluster cannot have three parts with the middle one which
 		 * is well-compressed for !ztailpacking cases.
 		 */
-		DBG_BUGON(!raw && !cfg.c_ztailpacking);
-		type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
+		DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking);
+		type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
 			Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
 		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
 
 		di.di_advise = advise;
-		di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+		di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
@@ -95,7 +102,7 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 		/* XXX: big pcluster feature should be per-inode */
 		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
-			di.di_u.delta[0] = cpu_to_le16(ctx->compressedblks |
+			di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
 					Z_EROFS_VLE_DI_D0_CBLKCNT);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else if (d0) {
@@ -119,9 +126,9 @@ static void vle_write_indexes(struct z_erofs_vle_compress_ctx *ctx,
 				di.di_u.delta[0] = cpu_to_le16(d0);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else {
-			type = raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
+			type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
 				Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
-			di.di_u.blkaddr = cpu_to_le32(ctx->blkaddr);
+			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
 		}
 		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
 		di.di_advise = advise;
@@ -226,18 +233,16 @@ static int vle_compress_one(struct erofs_inode *inode,
 			    struct z_erofs_vle_compress_ctx *ctx,
 			    bool final)
 {
+	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
+	char *const dst = dstbuf + EROFS_BLKSIZ;
 	struct erofs_compress *const h = &compresshandle;
 	unsigned int len = ctx->tail - ctx->head;
-	unsigned int count;
 	int ret;
-	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
-	char *const dst = dstbuf + EROFS_BLKSIZ;
 
 	while (len) {
 		unsigned int pclustersize =
 			z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ;
 		bool may_inline = (cfg.c_ztailpacking && final);
-		bool raw;
 
 		if (len <= pclustersize) {
 			if (!final)
@@ -246,10 +251,11 @@ static int vle_compress_one(struct erofs_inode *inode,
 				goto nocompression;
 		}
 
-		count = min(len, cfg.c_max_decompressed_extent_bytes);
+		ctx->e.length = min(len,
+				cfg.c_max_decompressed_extent_bytes);
 		ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
-					      &count, dst, pclustersize,
-					      !(final && len == count));
+				&ctx->e.length, dst, pclustersize,
+				!(final && len == ctx->e.length));
 		if (ret <= 0) {
 			if (ret != -EAGAIN) {
 				erofs_err("failed to compress %s: %s",
@@ -267,17 +273,17 @@ nocompression:
 
 			if (ret < 0)
 				return ret;
-			count = ret;
+			ctx->e.length = ret;
 
 			/*
 			 * XXX: For now, we have to leave `ctx->compressedblks
 			 * = 1' since there is no way to generate compressed
 			 * indexes after the time that ztailpacking is decided.
 			 */
-			ctx->compressedblks = 1;
-			raw = true;
+			ctx->e.compressedblks = 1;
+			ctx->e.raw = true;
 		/* tailpcluster should be less than 1 block */
-		} else if (may_inline && len == count &&
+		} else if (may_inline && len == ctx->e.length &&
 			   ret < EROFS_BLKSIZ) {
 			if (ctx->clusterofs + len <= EROFS_BLKSIZ) {
 				inode->eof_tailraw = malloc(len);
@@ -292,19 +298,20 @@ nocompression:
 			ret = z_erofs_fill_inline_data(inode, dst, ret, false);
 			if (ret < 0)
 				return ret;
-			ctx->compressedblks = 1;
-			raw = false;
+			ctx->e.compressedblks = 1;
+			ctx->e.raw = false;
 		} else {
 			unsigned int tailused, padding;
 
-			if (may_inline && len == count)
+			if (may_inline && len == ctx->e.length)
 				tryrecompress_trailing(ctx->queue + ctx->head,
-						       &count, dst, &ret);
+						&ctx->e.length, dst, &ret);
 
 			tailused = ret & (EROFS_BLKSIZ - 1);
 			padding = 0;
-			ctx->compressedblks = BLK_ROUND_UP(ret);
-			DBG_BUGON(ctx->compressedblks * EROFS_BLKSIZ >= count);
+			ctx->e.compressedblks = BLK_ROUND_UP(ret);
+			DBG_BUGON(ctx->e.compressedblks * EROFS_BLKSIZ >=
+				  ctx->e.length);
 
 			/* zero out garbage trailing data for non-0padding */
 			if (!erofs_sb_has_lz4_0padding())
@@ -315,21 +322,22 @@ nocompression:
 
 			/* write compressed data */
 			erofs_dbg("Writing %u compressed data to %u of %u blocks",
-				  count, ctx->blkaddr, ctx->compressedblks);
+				  ctx->e.length, ctx->blkaddr,
+				  ctx->e.compressedblks);
 
 			ret = blk_write(dst - padding, ctx->blkaddr,
-					ctx->compressedblks);
+					ctx->e.compressedblks);
 			if (ret)
 				return ret;
-			raw = false;
+			ctx->e.raw = false;
 		}
+		/* write indexes for this pcluster */
+		ctx->e.blkaddr = ctx->blkaddr;
+		z_erofs_write_indexes(ctx);
 
-		ctx->head += count;
-		/* write compression indexes for this pcluster */
-		vle_write_indexes(ctx, count, raw);
-
-		ctx->blkaddr += ctx->compressedblks;
-		len -= count;
+		ctx->blkaddr += ctx->e.compressedblks;
+		ctx->head += ctx->e.length;
+		len -= ctx->e.length;
 
 		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
 			const unsigned int qh_aligned =
@@ -654,6 +662,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
+	ctx.e.length = 0;
 	remaining = inode->i_size;
 
 	while (remaining) {
@@ -679,7 +688,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	DBG_BUGON(compressed_blocks < !!inode->idata_size);
 	compressed_blocks -= !!inode->idata_size;
 
-	vle_write_indexes_final(&ctx);
+	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
 	if (compressed_blocks * EROFS_BLKSIZ + inode->idata_size +
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/8] erofs-utils: mkfs: support interlaced uncompressed data layout
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
                   ` (2 preceding siblings ...)
  2022-09-26 15:25 ` [PATCH 3/8] erofs-utils: introduce z_erofs_inmem_extent Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  2022-09-26 15:25 ` [PATCH 5/8] erofs-utils: mkfs: support fragments Gao Xiang
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

From: Yue Hu <huyue2@coolpad.com>

Interlaced uncompressed data can minimize unnecessary data processing
by using in-place I/O.

However it cannot be used together with deduplication.

Signed-off-by: Yue Hu <huyue2@coolpad.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/compress.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index 3c1d9db..8fa60e2 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -150,7 +150,7 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 				     unsigned int *len, char *dst)
 {
 	int ret;
-	unsigned int count;
+	unsigned int count, interlaced_offset, rightpart;
 
 	/* reset clusterofs to 0 if permitted */
 	if (!erofs_sb_has_lz4_0padding() && ctx->clusterofs &&
@@ -160,11 +160,16 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 		ctx->clusterofs = 0;
 	}
 
-	/* write uncompressed data */
 	count = min(EROFS_BLKSIZ, *len);
 
-	memcpy(dst, ctx->queue + ctx->head, count);
-	memset(dst + count, 0, EROFS_BLKSIZ - count);
+	/* write interlaced uncompressed data if needed */
+	interlaced_offset = 0; /* will set it to clusterofs */
+	rightpart = min(EROFS_BLKSIZ - interlaced_offset, count);
+
+	memset(dst, 0, EROFS_BLKSIZ);
+
+	memcpy(dst + interlaced_offset, ctx->queue + ctx->head, rightpart);
+	memcpy(dst, ctx->queue + ctx->head + rightpart, count - rightpart);
 
 	erofs_dbg("Writing %u uncompressed data to block %u",
 		  count, ctx->blkaddr);
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/8] erofs-utils: mkfs: support fragments
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
                   ` (3 preceding siblings ...)
  2022-09-26 15:25 ` [PATCH 4/8] erofs-utils: mkfs: support interlaced uncompressed data layout Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  2022-09-26 15:25 ` [PATCH 6/8] erofs-utils: lib: add rb-tree implementation Gao Xiang
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

From: Yue Hu <huyue2@coolpad.com>

This approach can merge tail pclusters or the whole files into a special
inode in order to achieve greater compression ratios. Also, an option of
pcluster size is provided for different compression requirements.

Enable interlaced uncompressed data layout for compressed files at the
same time as well.

Signed-off-by: Yue Hu <huyue2@coolpad.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/compress.h  |  11 ++++-
 include/erofs/config.h    |   3 +-
 include/erofs/fragments.h |  28 +++++++++++
 include/erofs/inode.h     |   1 +
 include/erofs/internal.h  |   3 ++
 lib/Makefile.am           |   4 +-
 lib/compress.c            | 101 ++++++++++++++++++++++++++++----------
 lib/fragments.c           |  65 ++++++++++++++++++++++++
 lib/inode.c               |  58 ++++++++++++++++++++--
 mkfs/main.c               |  58 +++++++++++++++++++---
 10 files changed, 295 insertions(+), 37 deletions(-)
 create mode 100644 include/erofs/fragments.h
 create mode 100644 lib/fragments.c

diff --git a/include/erofs/compress.h b/include/erofs/compress.h
index 24f6204..e9dfaf2 100644
--- a/include/erofs/compress.h
+++ b/include/erofs/compress.h
@@ -18,13 +18,22 @@ extern "C"
 #define EROFS_CONFIG_COMPR_MIN_SZ           (32   * 1024)
 
 void z_erofs_drop_inline_pcluster(struct erofs_inode *inode);
-int erofs_write_compressed_file(struct erofs_inode *inode);
+int erofs_write_compressed_file(struct erofs_inode *inode, int fd);
 
 int z_erofs_compress_init(struct erofs_buffer_head *bh);
 int z_erofs_compress_exit(void);
 
 const char *z_erofs_list_available_compressors(unsigned int i);
 
+static inline bool erofs_is_packed_inode(struct erofs_inode *inode)
+{
+	if (inode->nid == EROFS_PACKED_NID_UNALLOCATED) {
+		DBG_BUGON(sbi.packed_nid != EROFS_PACKED_NID_UNALLOCATED);
+		return true;
+	}
+	return (sbi.packed_nid > 0 && inode->nid == sbi.packed_nid);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/erofs/config.h b/include/erofs/config.h
index 539d813..764b0f7 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -44,6 +44,7 @@ struct erofs_configure {
 	char c_chunkbits;
 	bool c_noinline_data;
 	bool c_ztailpacking;
+	bool c_fragments;
 	bool c_ignore_mtime;
 	bool c_showprogress;
 
@@ -62,7 +63,7 @@ struct erofs_configure {
 	/* < 0, xattr disabled and INT_MAX, always use inline xattrs */
 	int c_inline_xattr_tolerance;
 
-	u32 c_pclusterblks_max, c_pclusterblks_def;
+	u32 c_pclusterblks_max, c_pclusterblks_def, c_pclusterblks_packed;
 	u32 c_max_decompressed_extent_bytes;
 	u32 c_dict_size;
 	u64 c_unix_timestamp;
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
new file mode 100644
index 0000000..5444384
--- /dev/null
+++ b/include/erofs/fragments.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C), 2022, Coolpad Group Limited.
+ */
+#ifndef __EROFS_FRAGMENTS_H
+#define __EROFS_FRAGMENTS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "erofs/internal.h"
+
+extern const char *frags_packedname;
+#define EROFS_PACKED_INODE	frags_packedname
+
+int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
+			   unsigned int len);
+struct erofs_inode *erofs_mkfs_build_fragments(void);
+int erofs_fragments_init(void);
+void erofs_fragments_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/inode.h b/include/erofs/inode.h
index 79b8d89..bf20cd3 100644
--- a/include/erofs/inode.h
+++ b/include/erofs/inode.h
@@ -22,6 +22,7 @@ unsigned int erofs_iput(struct erofs_inode *inode);
 erofs_nid_t erofs_lookupnid(struct erofs_inode *inode);
 struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
 						    const char *path);
+struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name);
 
 #ifdef __cplusplus
 }
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index dd3776c..6fc58f9 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -70,6 +70,8 @@ struct erofs_device_info {
 	u32 mapped_blkaddr;
 };
 
+#define EROFS_PACKED_NID_UNALLOCATED	-1
+
 struct erofs_sb_info {
 	struct erofs_device_info *devs;
 
@@ -212,6 +214,7 @@ struct erofs_inode {
 	uint64_t capabilities;
 #endif
 	erofs_off_t fragmentoff;
+	unsigned int fragment_size;
 };
 
 static inline bool is_inode_layout_compression(struct erofs_inode *inode)
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 3fad357..95f1d55 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -22,12 +22,14 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/include/erofs/trace.h \
       $(top_srcdir)/include/erofs/xattr.h \
       $(top_srcdir)/include/erofs/compress_hints.h \
+      $(top_srcdir)/include/erofs/fragments.h \
       $(top_srcdir)/lib/liberofs_private.h
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
-		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c
+		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
+		      fragments.c
 liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index 8fa60e2..c0bd307 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -18,6 +18,7 @@
 #include "compressor.h"
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
+#include "erofs/fragments.h"
 
 static struct erofs_compress compresshandle;
 static unsigned int algorithmtype[2];
@@ -33,6 +34,7 @@ struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
 	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
 
+	struct erofs_inode *inode;
 	u8 *metacur;
 	unsigned int head, tail;
 	erofs_blk_t blkaddr;		/* pointing to the next blkaddr */
@@ -68,6 +70,7 @@ static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
 
 static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 {
+	struct erofs_inode *inode = ctx->inode;
 	unsigned int clusterofs = ctx->clusterofs;
 	unsigned int count = ctx->e.length;
 	unsigned int d0 = 0, d1 = (clusterofs + count) / EROFS_BLKSIZ;
@@ -89,7 +92,11 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
 
 		di.di_advise = advise;
-		di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY &&
+		    !ctx->e.compressedblks)
+			di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
+		else
+			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
@@ -128,7 +135,12 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		} else {
 			type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
 				Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
-			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+
+			if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY &&
+			    !ctx->e.compressedblks)
+				di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
+			else
+				di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
 		}
 		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
 		di.di_advise = advise;
@@ -163,7 +175,10 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 	count = min(EROFS_BLKSIZ, *len);
 
 	/* write interlaced uncompressed data if needed */
-	interlaced_offset = 0; /* will set it to clusterofs */
+	if (ctx->inode->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
+		interlaced_offset = ctx->clusterofs;
+	else
+		interlaced_offset = 0;
 	rightpart = min(EROFS_BLKSIZ - interlaced_offset, count);
 
 	memset(dst, 0, EROFS_BLKSIZ);
@@ -181,6 +196,8 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 
 static unsigned int z_erofs_get_max_pclusterblks(struct erofs_inode *inode)
 {
+	if (erofs_is_packed_inode(inode))
+		return cfg.c_pclusterblks_packed;
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
 		return 1 + rand() % cfg.c_pclusterblks_max;
@@ -234,11 +251,11 @@ static void tryrecompress_trailing(void *in, unsigned int *insize,
 	*compressedsize = ret;
 }
 
-static int vle_compress_one(struct erofs_inode *inode,
-			    struct z_erofs_vle_compress_ctx *ctx,
+static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx,
 			    bool final)
 {
 	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_BLKSIZ];
+	struct erofs_inode *inode = ctx->inode;
 	char *const dst = dstbuf + EROFS_BLKSIZ;
 	struct erofs_compress *const h = &compresshandle;
 	unsigned int len = ctx->tail - ctx->head;
@@ -248,10 +265,16 @@ static int vle_compress_one(struct erofs_inode *inode,
 		unsigned int pclustersize =
 			z_erofs_get_max_pclusterblks(inode) * EROFS_BLKSIZ;
 		bool may_inline = (cfg.c_ztailpacking && final);
+		bool may_packing = (cfg.c_fragments && final &&
+				   !erofs_is_packed_inode(inode));
 
 		if (len <= pclustersize) {
 			if (!final)
 				break;
+			if (may_packing) {
+				ctx->e.length = len;
+				goto frag_packing;
+			}
 			if (!may_inline && len <= EROFS_BLKSIZ)
 				goto nocompression;
 		}
@@ -267,7 +290,6 @@ static int vle_compress_one(struct erofs_inode *inode,
 					  inode->i_srcpath,
 					  erofs_strerror(ret));
 			}
-
 			if (may_inline && len < EROFS_BLKSIZ)
 				ret = z_erofs_fill_inline_data(inode,
 						ctx->queue + ctx->head,
@@ -287,6 +309,16 @@ nocompression:
 			 */
 			ctx->e.compressedblks = 1;
 			ctx->e.raw = true;
+		} else if (may_packing && len == ctx->e.length &&
+			   ret < pclustersize) {
+frag_packing:
+			ret = z_erofs_pack_fragments(inode,
+						     ctx->queue + ctx->head,
+						     len);
+			if (ret < 0)
+				return ret;
+			ctx->e.compressedblks = 0; /* indicate a fragment */
+			ctx->e.raw = true;
 		/* tailpcluster should be less than 1 block */
 		} else if (may_inline && len == ctx->e.length &&
 			   ret < EROFS_BLKSIZ) {
@@ -559,13 +591,17 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode,
 {
 	struct z_erofs_map_header h = {
 		.h_advise = cpu_to_le16(inode->z_advise),
-		.h_idata_size = cpu_to_le16(inode->idata_size),
 		.h_algorithmtype = inode->z_algorithmtype[1] << 4 |
 				   inode->z_algorithmtype[0],
 		/* lclustersize */
 		.h_clusterbits = inode->z_logical_clusterbits - 12,
 	};
 
+	if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
+		h.h_fragmentoff = cpu_to_le32(inode->fragmentoff);
+	else
+		h.h_idata_size = cpu_to_le16(inode->idata_size);
+
 	memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE);
 	/* write out map header */
 	memcpy(compressmeta, &h, sizeof(struct z_erofs_map_header));
@@ -618,30 +654,24 @@ void z_erofs_drop_inline_pcluster(struct erofs_inode *inode)
 	inode->eof_tailraw = NULL;
 }
 
-int erofs_write_compressed_file(struct erofs_inode *inode)
+int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 {
 	struct erofs_buffer_head *bh;
 	static struct z_erofs_vle_compress_ctx ctx;
 	erofs_off_t remaining;
 	erofs_blk_t blkaddr, compressed_blocks;
 	unsigned int legacymetasize;
-	int ret, fd;
+	int ret;
 	u8 *compressmeta = malloc(vle_compressmeta_capacity(inode->i_size));
 
 	if (!compressmeta)
 		return -ENOMEM;
 
-	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
-	if (fd < 0) {
-		ret = -errno;
-		goto err_free_meta;
-	}
-
 	/* allocate main data buffer */
 	bh = erofs_balloc(DATA, 0, 0, 0);
 	if (IS_ERR(bh)) {
 		ret = PTR_ERR(bh);
-		goto err_close;
+		goto err_free_meta;
 	}
 
 	/* initialize per-file compression setting */
@@ -658,11 +688,17 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
 	}
+	if (cfg.c_fragments)
+		inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
 	inode->z_algorithmtype[0] = algorithmtype[0];
 	inode->z_algorithmtype[1] = algorithmtype[1];
 	inode->z_logical_clusterbits = LOG_BLOCK_SIZE;
 
+	inode->idata_size = 0;
+	inode->fragment_size = 0;
+
 	blkaddr = erofs_mapbh(bh->block);	/* start_blkaddr */
+	ctx.inode = inode;
 	ctx.blkaddr = blkaddr;
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
@@ -682,7 +718,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		remaining -= readcount;
 		ctx.tail += readcount;
 
-		ret = vle_compress_one(inode, &ctx, !remaining);
+		ret = vle_compress_one(&ctx, !remaining);
 		if (ret)
 			goto err_free_idata;
 	}
@@ -696,29 +732,41 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
-	if (compressed_blocks * EROFS_BLKSIZ + inode->idata_size +
+	if (!inode->fragment_size &&
+	    compressed_blocks * EROFS_BLKSIZ + inode->idata_size +
 	    legacymetasize >= inode->i_size) {
 		ret = -ENOSPC;
 		goto err_free_idata;
 	}
 	z_erofs_write_mapheader(inode, compressmeta);
 
-	close(fd);
+	/* if the entire file is a fragment, a simplified form is used. */
+	if (inode->i_size == inode->fragment_size) {
+		DBG_BUGON(inode->fragmentoff >> 63);
+		*(__le64 *)compressmeta =
+			cpu_to_le64(inode->fragmentoff | 1ULL << 63);
+		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+		legacymetasize = Z_EROFS_LEGACY_MAP_HEADER_SIZE;
+	}
+
 	if (compressed_blocks) {
 		ret = erofs_bh_balloon(bh, blknr_to_addr(compressed_blocks));
 		DBG_BUGON(ret != EROFS_BLKSIZ);
 	} else {
-		DBG_BUGON(!inode->idata_size);
+		if (!cfg.c_fragments)
+			DBG_BUGON(!inode->idata_size);
 	}
 
 	erofs_info("compressed %s (%llu bytes) into %u blocks",
 		   inode->i_srcpath, (unsigned long long)inode->i_size,
 		   compressed_blocks);
 
-	if (inode->idata_size)
+	if (inode->idata_size) {
+		bh->op = &erofs_skip_write_bhops;
 		inode->bh_data = bh;
-	else
+	} else {
 		erofs_bdrop(bh, false);
+	}
 
 	inode->u.i_blocks = compressed_blocks;
 
@@ -731,7 +779,8 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
 		DBG_BUGON(ret);
 	}
 	inode->compressmeta = compressmeta;
-	erofs_droid_blocklist_write(inode, blkaddr, compressed_blocks);
+	if (!erofs_is_packed_inode(inode))
+		erofs_droid_blocklist_write(inode, blkaddr, compressed_blocks);
 	return 0;
 
 err_free_idata:
@@ -741,8 +790,6 @@ err_free_idata:
 	}
 err_bdrop:
 	erofs_bdrop(bh, true);	/* revoke buffer */
-err_close:
-	close(fd);
 err_free_meta:
 	free(compressmeta);
 	return ret;
@@ -856,6 +903,10 @@ int z_erofs_compress_init(struct erofs_buffer_head *sb_bh)
 		}
 		erofs_sb_set_big_pcluster();
 	}
+	if (cfg.c_pclusterblks_packed > cfg.c_pclusterblks_max) {
+		erofs_err("invalid physical cluster size for the packed file");
+		return -EINVAL;
+	}
 
 	if (ret != Z_EROFS_COMPRESSION_LZ4)
 		erofs_sb_set_compr_cfgs();
diff --git a/lib/fragments.c b/lib/fragments.c
new file mode 100644
index 0000000..b8c37d5
--- /dev/null
+++ b/lib/fragments.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C), 2022, Coolpad Group Limited.
+ * Created by Yue Hu <huyue2@coolpad.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <unistd.h>
+#include "erofs/err.h"
+#include "erofs/inode.h"
+#include "erofs/compress.h"
+#include "erofs/print.h"
+#include "erofs/fragments.h"
+
+static FILE *packedfile;
+const char *frags_packedname = "packed_file";
+
+int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
+			   unsigned int len)
+{
+	inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+	inode->fragmentoff = ftell(packedfile);
+	inode->fragment_size = len;
+	/*
+	 * If the packed inode is larger than 4GiB, the full fragmentoff
+	 * will be recorded by switching to the noncompact layout anyway.
+	 */
+	if (inode->fragmentoff >> 32)
+		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+
+	if (fwrite(data, len, 1, packedfile) != 1)
+		return -EIO;
+
+	erofs_sb_set_fragments();
+
+	erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
+		  inode->fragmentoff);
+	return len;
+}
+
+struct erofs_inode *erofs_mkfs_build_fragments(void)
+{
+	fflush(packedfile);
+
+	return erofs_mkfs_build_special_from_fd(fileno(packedfile),
+						frags_packedname);
+}
+
+void erofs_fragments_exit(void)
+{
+	if (packedfile)
+		fclose(packedfile);
+}
+
+int erofs_fragments_init(void)
+{
+#ifdef HAVE_TMPFILE64
+	packedfile = tmpfile64();
+#else
+	packedfile = tmpfile();
+#endif
+	if (!packedfile)
+		return -ENOMEM;
+	return 0;
+}
diff --git a/lib/inode.c b/lib/inode.c
index 4da28b3..130e275 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -25,6 +25,7 @@
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
 #include "erofs/blobchunk.h"
+#include "erofs/fragments.h"
 #include "liberofs_private.h"
 
 #define S_SHIFT                 12
@@ -424,7 +425,11 @@ int erofs_write_file(struct erofs_inode *inode)
 	}
 
 	if (cfg.c_compr_alg_master && erofs_file_is_compressible(inode)) {
-		ret = erofs_write_compressed_file(inode);
+		fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
+		if (fd < 0)
+			return -errno;
+		ret = erofs_write_compressed_file(inode, fd);
+		close(fd);
 
 		if (!ret || ret != -ENOSPC)
 			return ret;
@@ -769,6 +774,8 @@ static bool erofs_should_use_inode_extended(struct erofs_inode *inode)
 		return true;
 	if (inode->i_size > UINT_MAX)
 		return true;
+	if (erofs_is_packed_inode(inode))
+		return false;
 	if (inode->i_uid > USHRT_MAX)
 		return true;
 	if (inode->i_gid > USHRT_MAX)
@@ -844,8 +851,7 @@ static int erofs_droid_inode_fsconfig(struct erofs_inode *inode,
 }
 #endif
 
-static int erofs_fill_inode(struct erofs_inode *inode,
-			    struct stat64 *st,
+static int erofs_fill_inode(struct erofs_inode *inode, struct stat64 *st,
 			    const char *path)
 {
 	int err = erofs_droid_inode_fsconfig(inode, st, path);
@@ -1180,3 +1186,49 @@ struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
 
 	return erofs_mkfs_build_tree(inode);
 }
+
+struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name)
+{
+	struct stat64 st;
+	struct erofs_inode *inode;
+	int ret;
+
+	lseek(fd, 0, SEEK_SET);
+	ret = fstat64(fd, &st);
+	if (ret)
+		return ERR_PTR(-errno);
+
+	inode = erofs_new_inode();
+	if (IS_ERR(inode))
+		return inode;
+
+	if (name == EROFS_PACKED_INODE) {
+		st.st_uid = st.st_gid = 0;
+		st.st_nlink = 0;
+	}
+
+	ret = erofs_fill_inode(inode, &st, name);
+	if (ret) {
+		free(inode);
+		return ERR_PTR(ret);
+	}
+
+	if (name == EROFS_PACKED_INODE) {
+		sbi.packed_nid = EROFS_PACKED_NID_UNALLOCATED;
+		inode->nid = sbi.packed_nid;
+	}
+
+	ret = erofs_write_compressed_file(inode, fd);
+	if (ret == -ENOSPC) {
+		lseek(fd, 0, SEEK_SET);
+		ret = write_uncompressed_file_from_fd(inode, fd);
+	}
+
+	if (ret) {
+		DBG_BUGON(ret == -ENOSPC);
+		return ERR_PTR(ret);
+	}
+	erofs_prepare_inode_buffer(inode);
+	erofs_write_tail_end(inode);
+	return inode;
+}
diff --git a/mkfs/main.c b/mkfs/main.c
index 594ecf9..bbca7b9 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -23,6 +23,7 @@
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
 #include "erofs/blobchunk.h"
+#include "erofs/fragments.h"
 #include "../lib/liberofs_private.h"
 
 #ifdef HAVE_LIBUUID
@@ -133,9 +134,9 @@ static int parse_extended_opts(const char *opts)
 		const char *p = strchr(token, ',');
 
 		next = NULL;
-		if (p)
+		if (p) {
 			next = p + 1;
-		else {
+		} else {
 			p = token + strlen(token);
 			next = p;
 		}
@@ -202,6 +203,23 @@ static int parse_extended_opts(const char *opts)
 				return -EINVAL;
 			cfg.c_ztailpacking = true;
 		}
+
+		if (MATCH_EXTENTED_OPT("fragments", token, keylen)) {
+			char *endptr;
+			u64 i;
+
+			cfg.c_fragments = true;
+			if (vallen) {
+				i = strtoull(value, &endptr, 0);
+				if (endptr - value != vallen ||
+				    i < EROFS_BLKSIZ || i % EROFS_BLKSIZ) {
+					erofs_err("invalid pcluster size for the packed file %s",
+						  next);
+					return -EINVAL;
+				}
+				cfg.c_pclusterblks_packed = i / EROFS_BLKSIZ;
+			}
+		}
 	}
 	return 0;
 }
@@ -458,7 +476,8 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 
 int erofs_mkfs_update_super_block(struct erofs_buffer_head *bh,
 				  erofs_nid_t root_nid,
-				  erofs_blk_t *blocks)
+				  erofs_blk_t *blocks,
+				  erofs_nid_t packed_nid)
 {
 	struct erofs_super_block sb = {
 		.magic     = cpu_to_le32(EROFS_SUPER_MAGIC_V1),
@@ -482,6 +501,7 @@ int erofs_mkfs_update_super_block(struct erofs_buffer_head *bh,
 	*blocks         = erofs_mapbh(NULL);
 	sb.blocks       = cpu_to_le32(*blocks);
 	sb.root_nid     = cpu_to_le16(root_nid);
+	sb.packed_nid    = cpu_to_le64(packed_nid);
 	memcpy(sb.uuid, sbi.uuid, sizeof(sb.uuid));
 
 	if (erofs_sb_has_compr_cfgs())
@@ -599,8 +619,8 @@ int main(int argc, char **argv)
 {
 	int err = 0;
 	struct erofs_buffer_head *sb_bh;
-	struct erofs_inode *root_inode;
-	erofs_nid_t root_nid;
+	struct erofs_inode *root_inode, *packed_inode;
+	erofs_nid_t root_nid, packed_nid;
 	struct stat64 st;
 	erofs_blk_t nblocks;
 	struct timeval t;
@@ -668,6 +688,17 @@ int main(int argc, char **argv)
 	erofs_show_config();
 	if (cfg.c_ztailpacking)
 		erofs_warn("EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
+	if (cfg.c_fragments) {
+		if (!cfg.c_pclusterblks_packed)
+			cfg.c_pclusterblks_packed = cfg.c_pclusterblks_def;
+
+		err = erofs_fragments_init();
+		if (err) {
+			erofs_err("failed to initialize fragments");
+			return 1;
+		}
+		erofs_warn("EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
+	}
 	erofs_set_fs_root(cfg.c_src_path);
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
@@ -737,7 +768,20 @@ int main(int argc, char **argv)
 			goto exit;
 	}
 
-	err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks);
+	packed_nid = 0;
+	if (cfg.c_fragments && erofs_sb_has_fragments()) {
+		erofs_update_progressinfo("Handling packed_file ...");
+		packed_inode = erofs_mkfs_build_fragments();
+		if (IS_ERR(packed_inode)) {
+			err = PTR_ERR(packed_inode);
+			goto exit;
+		}
+		packed_nid = erofs_lookupnid(packed_inode);
+		erofs_iput(packed_inode);
+	}
+
+	err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks,
+					    packed_nid);
 	if (err)
 		goto exit;
 
@@ -759,6 +803,8 @@ exit:
 	erofs_cleanup_exclude_rules();
 	if (cfg.c_chunkbits)
 		erofs_blob_exit();
+	if (cfg.c_fragments)
+		erofs_fragments_exit();
 	erofs_exit_configure();
 
 	if (err) {
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 6/8] erofs-utils: lib: add rb-tree implementation
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
                   ` (4 preceding siblings ...)
  2022-09-26 15:25 ` [PATCH 5/8] erofs-utils: mkfs: support fragments Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  2022-09-26 15:25 ` [PATCH 7/8] erofs-utils: fuse: introduce partial-referenced pclusters Gao Xiang
  2022-09-26 15:25 ` [PATCH 8/8] erofs-utils: mkfs: introduce global compressed data deduplication Gao Xiang
  7 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

From: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>

Introduce a simple rb-tree implementation in order to store the
hash map for deduplication.

Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/Makefile.am |   2 +-
 lib/rb_tree.c   | 512 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/rb_tree.h   | 104 ++++++++++
 3 files changed, 617 insertions(+), 1 deletion(-)
 create mode 100644 lib/rb_tree.c
 create mode 100644 lib/rb_tree.h

diff --git a/lib/Makefile.am b/lib/Makefile.am
index 95f1d55..1a2071c 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -29,7 +29,7 @@ noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
 		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
-		      fragments.c
+		      fragments.c rb_tree.c
 liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/rb_tree.c b/lib/rb_tree.c
new file mode 100644
index 0000000..28800a9
--- /dev/null
+++ b/lib/rb_tree.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: Unlicense
+//
+// Based on Julienne Walker's <http://eternallyconfuzzled.com/> rb_tree
+// implementation.
+//
+// Modified by Mirek Rusin <http://github.com/mirek/rb_tree>.
+//
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to <http://unlicense.org/>
+//
+
+#include "rb_tree.h"
+
+// rb_node
+
+struct rb_node *
+rb_node_alloc () {
+    return malloc(sizeof(struct rb_node));
+}
+
+struct rb_node *
+rb_node_init (struct rb_node *self, void *value) {
+    if (self) {
+        self->red = 1;
+        self->link[0] = self->link[1] = NULL;
+        self->value = value;
+    }
+    return self;
+}
+
+struct rb_node *
+rb_node_create (void *value) {
+    return rb_node_init(rb_node_alloc(), value);
+}
+
+void
+rb_node_dealloc (struct rb_node *self) {
+    if (self) {
+        free(self);
+    }
+}
+
+static int
+rb_node_is_red (const struct rb_node *self) {
+    return self ? self->red : 0;
+}
+
+static struct rb_node *
+rb_node_rotate (struct rb_node *self, int dir) {
+    struct rb_node *result = NULL;
+    if (self) {
+        result = self->link[!dir];
+        self->link[!dir] = result->link[dir];
+        result->link[dir] = self;
+        self->red = 1;
+        result->red = 0;
+    }
+    return result;
+}
+
+static struct rb_node *
+rb_node_rotate2 (struct rb_node *self, int dir) {
+    struct rb_node *result = NULL;
+    if (self) {
+        self->link[!dir] = rb_node_rotate(self->link[!dir], !dir);
+        result = rb_node_rotate(self, dir);
+    }
+    return result;
+}
+
+// rb_tree - default callbacks
+
+int
+rb_tree_node_cmp_ptr_cb (struct rb_tree *self, struct rb_node *a, struct rb_node *b) {
+    return (a->value > b->value) - (a->value < b->value);
+}
+
+void
+rb_tree_node_dealloc_cb (struct rb_tree *self, struct rb_node *node) {
+    if (self) {
+        if (node) {
+            rb_node_dealloc(node);
+        }
+    }
+}
+
+// rb_tree
+
+struct rb_tree *
+rb_tree_alloc () {
+    return malloc(sizeof(struct rb_tree));
+}
+
+struct rb_tree *
+rb_tree_init (struct rb_tree *self, rb_tree_node_cmp_f node_cmp_cb) {
+    if (self) {
+        self->root = NULL;
+        self->size = 0;
+        self->cmp = node_cmp_cb ? node_cmp_cb : rb_tree_node_cmp_ptr_cb;
+    }
+    return self;
+}
+
+struct rb_tree *
+rb_tree_create (rb_tree_node_cmp_f node_cb) {
+    return rb_tree_init(rb_tree_alloc(), node_cb);
+}
+
+void
+rb_tree_dealloc (struct rb_tree *self, rb_tree_node_f node_cb) {
+    if (self) {
+        if (node_cb) {
+            struct rb_node *node = self->root;
+            struct rb_node *save = NULL;
+
+            // Rotate away the left links so that
+            // we can treat this like the destruction
+            // of a linked list
+            while (node) {
+                if (node->link[0] == NULL) {
+
+                    // No left links, just kill the node and move on
+                    save = node->link[1];
+                    node_cb(self, node);
+                    node = NULL;
+                } else {
+
+                    // Rotate away the left link and check again
+                    save = node->link[0];
+                    node->link[0] = save->link[1];
+                    save->link[1] = node;
+                }
+                node = save;
+            }
+        }
+        free(self);
+    }
+}
+
+int
+rb_tree_test (struct rb_tree *self, struct rb_node *root) {
+    int lh, rh;
+
+    if ( root == NULL )
+        return 1;
+    else {
+        struct rb_node *ln = root->link[0];
+        struct rb_node *rn = root->link[1];
+
+        /* Consecutive red links */
+        if (rb_node_is_red(root)) {
+            if (rb_node_is_red(ln) || rb_node_is_red(rn)) {
+                printf("Red violation");
+                return 0;
+            }
+        }
+
+        lh = rb_tree_test(self, ln);
+        rh = rb_tree_test(self, rn);
+
+        /* Invalid binary search tree */
+        if ( ( ln != NULL && self->cmp(self, ln, root) >= 0 )
+            || ( rn != NULL && self->cmp(self, rn, root) <= 0))
+        {
+            puts ( "Binary tree violation" );
+            return 0;
+        }
+
+        /* Black height mismatch */
+        if ( lh != 0 && rh != 0 && lh != rh ) {
+            puts ( "Black violation" );
+            return 0;
+        }
+
+        /* Only count black links */
+        if ( lh != 0 && rh != 0 )
+            return rb_node_is_red ( root ) ? lh : lh + 1;
+        else
+            return 0;
+    }
+}
+
+void *
+rb_tree_find(struct rb_tree *self, void *value) {
+    void *result = NULL;
+    if (self) {
+        struct rb_node node = { .value = value };
+        struct rb_node *it = self->root;
+        int cmp = 0;
+        while (it) {
+            if ((cmp = self->cmp(self, it, &node))) {
+
+                // If the tree supports duplicates, they should be
+                // chained to the right subtree for this to work
+                it = it->link[cmp < 0];
+            } else {
+                break;
+            }
+        }
+        result = it ? it->value : NULL;
+    }
+    return result;
+}
+
+// Creates (malloc'ates)
+int
+rb_tree_insert (struct rb_tree *self, void *value) {
+    return rb_tree_insert_node(self, rb_node_create(value));
+}
+
+// Returns 1 on success, 0 otherwise.
+int
+rb_tree_insert_node (struct rb_tree *self, struct rb_node *node) {
+    if (self && node) {
+        if (self->root == NULL) {
+            self->root = node;
+        } else {
+            struct rb_node head = { 0 }; // False tree root
+            struct rb_node *g, *t;       // Grandparent & parent
+            struct rb_node *p, *q;       // Iterator & parent
+            int dir = 0, last = 0;
+
+            // Set up our helpers
+            t = &head;
+            g = p = NULL;
+            q = t->link[1] = self->root;
+
+            // Search down the tree for a place to insert
+            while (1) {
+                if (q == NULL) {
+
+                    // Insert node at the first null link.
+                    p->link[dir] = q = node;
+                } else if (rb_node_is_red(q->link[0]) && rb_node_is_red(q->link[1])) {
+
+                    // Simple red violation: color flip
+                    q->red = 1;
+                    q->link[0]->red = 0;
+                    q->link[1]->red = 0;
+                }
+
+                if (rb_node_is_red(q) && rb_node_is_red(p)) {
+
+                    // Hard red violation: rotations necessary
+                    int dir2 = t->link[1] == g;
+                    if (q == p->link[last]) {
+                        t->link[dir2] = rb_node_rotate(g, !last);
+                    } else {
+                        t->link[dir2] = rb_node_rotate2(g, !last);
+                    }
+                }
+
+                // Stop working if we inserted a node. This
+                // check also disallows duplicates in the tree
+                if (self->cmp(self, q, node) == 0) {
+                    break;
+                }
+
+                last = dir;
+                dir = self->cmp(self, q, node) < 0;
+
+                // Move the helpers down
+                if (g != NULL) {
+                    t = g;
+                }
+
+                g = p, p = q;
+                q = q->link[dir];
+            }
+
+            // Update the root (it may be different)
+            self->root = head.link[1];
+        }
+
+        // Make the root black for simplified logic
+        self->root->red = 0;
+        ++self->size;
+        return 1;
+    }
+    return 0;
+}
+
+// Returns 1 if the value was removed, 0 otherwise. Optional node callback
+// can be provided to dealloc node and/or user data. Use rb_tree_node_dealloc
+// default callback to deallocate node created by rb_tree_insert(...).
+int
+rb_tree_remove_with_cb (struct rb_tree *self, void *value, rb_tree_node_f node_cb) {
+    if (self->root != NULL) {
+        struct rb_node head = {0}; // False tree root
+        struct rb_node node = { .value = value }; // Value wrapper node
+        struct rb_node *q, *p, *g; // Helpers
+        struct rb_node *f = NULL;  // Found item
+        int dir = 1;
+
+        // Set up our helpers
+        q = &head;
+        g = p = NULL;
+        q->link[1] = self->root;
+
+        // Search and push a red node down
+        // to fix red violations as we go
+        while (q->link[dir] != NULL) {
+            int last = dir;
+
+            // Move the helpers down
+            g = p, p = q;
+            q = q->link[dir];
+            dir = self->cmp(self, q, &node) < 0;
+
+            // Save the node with matching value and keep
+            // going; we'll do removal tasks at the end
+            if (self->cmp(self, q, &node) == 0) {
+                f = q;
+            }
+
+            // Push the red node down with rotations and color flips
+            if (!rb_node_is_red(q) && !rb_node_is_red(q->link[dir])) {
+                if (rb_node_is_red(q->link[!dir])) {
+                    p = p->link[last] = rb_node_rotate(q, dir);
+                } else if (!rb_node_is_red(q->link[!dir])) {
+                    struct rb_node *s = p->link[!last];
+                    if (s) {
+                        if (!rb_node_is_red(s->link[!last]) && !rb_node_is_red(s->link[last])) {
+
+                            // Color flip
+                            p->red = 0;
+                            s->red = 1;
+                            q->red = 1;
+                        } else {
+                            int dir2 = g->link[1] == p;
+                            if (rb_node_is_red(s->link[last])) {
+                                g->link[dir2] = rb_node_rotate2(p, last);
+                            } else if (rb_node_is_red(s->link[!last])) {
+                                g->link[dir2] = rb_node_rotate(p, last);
+                            }
+
+                            // Ensure correct coloring
+                            q->red = g->link[dir2]->red = 1;
+                            g->link[dir2]->link[0]->red = 0;
+                            g->link[dir2]->link[1]->red = 0;
+                        }
+                    }
+                }
+            }
+        }
+
+        // Replace and remove the saved node
+        if (f) {
+            void *tmp = f->value;
+            f->value = q->value;
+            q->value = tmp;
+
+            p->link[p->link[1] == q] = q->link[q->link[0] == NULL];
+
+            if (node_cb) {
+                node_cb(self, q);
+            }
+            q = NULL;
+        }
+
+        // Update the root (it may be different)
+        self->root = head.link[1];
+
+        // Make the root black for simplified logic
+        if (self->root != NULL) {
+            self->root->red = 0;
+        }
+
+        --self->size;
+    }
+    return 1;
+}
+
+int
+rb_tree_remove (struct rb_tree *self, void *value) {
+    int result = 0;
+    if (self) {
+        result = rb_tree_remove_with_cb(self, value, rb_tree_node_dealloc_cb);
+    }
+    return result;
+}
+
+size_t
+rb_tree_size (struct rb_tree *self) {
+    size_t result = 0;
+    if (self) {
+        result = self->size;
+    }
+    return result;
+}
+
+// rb_iter
+
+struct rb_iter *
+rb_iter_alloc () {
+    return malloc(sizeof(struct rb_iter));
+}
+
+struct rb_iter *
+rb_iter_init (struct rb_iter *self) {
+    if (self) {
+        self->tree = NULL;
+        self->node = NULL;
+        self->top = 0;
+    }
+    return self;
+}
+
+struct rb_iter *
+rb_iter_create () {
+    return rb_iter_init(rb_iter_alloc());
+}
+
+void
+rb_iter_dealloc (struct rb_iter *self) {
+    if (self) {
+        free(self);
+    }
+}
+
+// Internal function, init traversal object, dir determines whether
+// to begin traversal at the smallest or largest valued node.
+static void *
+rb_iter_start (struct rb_iter *self, struct rb_tree *tree, int dir) {
+    void *result = NULL;
+    if (self) {
+        self->tree = tree;
+        self->node = tree->root;
+        self->top = 0;
+
+        // Save the path for later selfersal
+        if (self->node != NULL) {
+            while (self->node->link[dir] != NULL) {
+                self->path[self->top++] = self->node;
+                self->node = self->node->link[dir];
+            }
+        }
+
+        result = self->node == NULL ? NULL : self->node->value;
+    }
+    return result;
+}
+
+// Traverse a red black tree in the user-specified direction (0 asc, 1 desc)
+static void *
+rb_iter_move (struct rb_iter *self, int dir) {
+    if (self->node->link[dir] != NULL) {
+
+        // Continue down this branch
+        self->path[self->top++] = self->node;
+        self->node = self->node->link[dir];
+        while ( self->node->link[!dir] != NULL ) {
+            self->path[self->top++] = self->node;
+            self->node = self->node->link[!dir];
+        }
+    } else {
+
+        // Move to the next branch
+        struct rb_node *last = NULL;
+        do {
+            if (self->top == 0) {
+                self->node = NULL;
+                break;
+            }
+            last = self->node;
+            self->node = self->path[--self->top];
+        } while (last == self->node->link[dir]);
+    }
+    return self->node == NULL ? NULL : self->node->value;
+}
+
+void *
+rb_iter_first (struct rb_iter *self, struct rb_tree *tree) {
+    return rb_iter_start(self, tree, 0);
+}
+
+void *
+rb_iter_last (struct rb_iter *self, struct rb_tree *tree) {
+    return rb_iter_start(self, tree, 1);
+}
+
+void *
+rb_iter_next (struct rb_iter *self) {
+    return rb_iter_move(self, 1);
+}
+
+void *
+rb_iter_prev (struct rb_iter *self) {
+    return rb_iter_move(self, 0);
+}
diff --git a/lib/rb_tree.h b/lib/rb_tree.h
new file mode 100644
index 0000000..5b35c74
--- /dev/null
+++ b/lib/rb_tree.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: Unlicense */
+//
+// Based on Julienne Walker's <http://eternallyconfuzzled.com/> rb_tree
+// implementation.
+//
+// Modified by Mirek Rusin <http://github.com/mirek/rb_tree>.
+//
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to <http://unlicense.org/>
+//
+
+#ifndef __RB_TREE_H__
+#define __RB_TREE_H__ 1
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#ifndef RB_ITER_MAX_HEIGHT
+#define RB_ITER_MAX_HEIGHT 64 // Tallest allowable tree to iterate
+#endif
+
+struct rb_node;
+struct rb_tree;
+
+typedef int  (*rb_tree_node_cmp_f) (struct rb_tree *self, struct rb_node *a, struct rb_node *b);
+typedef void (*rb_tree_node_f)     (struct rb_tree *self, struct rb_node *node);
+
+struct rb_node {
+    int             red;     // Color red (1), black (0)
+    struct rb_node *link[2]; // Link left [0] and right [1]
+    void           *value;   // User provided, used indirectly via rb_tree_node_cmp_f.
+};
+
+struct rb_tree {
+    struct rb_node    *root;
+    rb_tree_node_cmp_f cmp;
+    size_t             size;
+    void              *info; // User provided, not used by rb_tree.
+};
+
+struct rb_iter {
+    struct rb_tree *tree;
+    struct rb_node *node;                     // Current node
+    struct rb_node *path[RB_ITER_MAX_HEIGHT]; // Traversal path
+    size_t          top;                      // Top of stack
+    void           *info;                     // User provided, not used by rb_iter.
+};
+
+int             rb_tree_node_cmp_ptr_cb (struct rb_tree *self, struct rb_node *a, struct rb_node *b);
+void            rb_tree_node_dealloc_cb (struct rb_tree *self, struct rb_node *node);
+
+struct rb_node *rb_node_alloc           ();
+struct rb_node *rb_node_create          (void *value);
+struct rb_node *rb_node_init            (struct rb_node *self, void *value);
+void            rb_node_dealloc         (struct rb_node *self);
+
+struct rb_tree *rb_tree_alloc           ();
+struct rb_tree *rb_tree_create          (rb_tree_node_cmp_f cmp);
+struct rb_tree *rb_tree_init            (struct rb_tree *self, rb_tree_node_cmp_f cmp);
+void            rb_tree_dealloc         (struct rb_tree *self, rb_tree_node_f node_cb);
+void           *rb_tree_find            (struct rb_tree *self, void *value);
+int             rb_tree_insert          (struct rb_tree *self, void *value);
+int             rb_tree_remove          (struct rb_tree *self, void *value);
+size_t          rb_tree_size            (struct rb_tree *self);
+
+int             rb_tree_insert_node     (struct rb_tree *self, struct rb_node *node);
+int             rb_tree_remove_with_cb  (struct rb_tree *self, void *value, rb_tree_node_f node_cb);
+
+int             rb_tree_test            (struct rb_tree *self, struct rb_node *root);
+
+struct rb_iter *rb_iter_alloc           ();
+struct rb_iter *rb_iter_init            ();
+struct rb_iter *rb_iter_create          ();
+void            rb_iter_dealloc         (struct rb_iter *self);
+void           *rb_iter_first           (struct rb_iter *self, struct rb_tree *tree);
+void           *rb_iter_last            (struct rb_iter *self, struct rb_tree *tree);
+void           *rb_iter_next            (struct rb_iter *self);
+void           *rb_iter_prev            (struct rb_iter *self);
+
+#endif
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 7/8] erofs-utils: fuse: introduce partial-referenced pclusters
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
                   ` (5 preceding siblings ...)
  2022-09-26 15:25 ` [PATCH 6/8] erofs-utils: lib: add rb-tree implementation Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  2022-09-26 15:25 ` [PATCH 8/8] erofs-utils: mkfs: introduce global compressed data deduplication Gao Xiang
  7 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

Due to deduplication for compressed data, pclusters can be partially
referenced with their prefixes.

Decompression algorithms should know that in advance, otherwise they
will fail out unexpectedly.

Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/internal.h | 4 ++++
 include/erofs_fs.h       | 7 ++++++-
 lib/data.c               | 3 ++-
 lib/zmap.c               | 6 ++++++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 6fc58f9..3bce92f 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -136,6 +136,7 @@ EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
 EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
 EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
 EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
+EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
 
 #define EROFS_I_EA_INITED	(1 << 0)
@@ -286,6 +287,7 @@ enum {
 	BH_Encoded,
 	BH_FullMapped,
 	BH_Fragment,
+	BH_Partialref,
 };
 
 /* Has a disk mapping */
@@ -298,6 +300,8 @@ enum {
 #define EROFS_MAP_FULL_MAPPED	(1 << BH_FullMapped)
 /* Located in the special packed inode */
 #define EROFS_MAP_FRAGMENT	(1 << BH_Fragment)
+/* the extent refers to partial compressed data */
+#define EROFS_MAP_PARTIAL_REF	(1 << BH_Partialref)
 
 struct erofs_map_blocks {
 	char mpage[EROFS_BLKSIZ];
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index e492ad9..48ad5b5 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -26,6 +26,7 @@
 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE	0x00000008
 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING	0x00000010
 #define EROFS_FEATURE_INCOMPAT_FRAGMENTS	0x00000020
+#define EROFS_FEATURE_INCOMPAT_DEDUPE		0x00000020
 #define EROFS_ALL_FEATURE_INCOMPAT		\
 	(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
 	 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
@@ -33,7 +34,8 @@
 	 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
 	 EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
 	 EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
-	 EROFS_FEATURE_INCOMPAT_FRAGMENTS)
+	 EROFS_FEATURE_INCOMPAT_FRAGMENTS | \
+	 EROFS_FEATURE_INCOMPAT_DEDUPE)
 
 #define EROFS_SB_EXTSLOT_SIZE	16
 
@@ -371,6 +373,9 @@ enum {
 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2
 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT         0
 
+/* (noncompact, HEAD) This pcluster refers to compressed data partially */
+#define Z_EROFS_VLE_DI_PARTIAL_REF		(1 << 15)
+
 /*
  * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
  * compressed block count of a compressed extent (in logical clusters, aka.
diff --git a/lib/data.c b/lib/data.c
index bcb0f7e..76a6677 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -258,7 +258,8 @@ static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
 		} else {
 			DBG_BUGON(end != map.m_la + map.m_llen);
 			length = map.m_llen;
-			partial = !(map.m_flags & EROFS_MAP_FULL_MAPPED);
+			partial = !(map.m_flags & EROFS_MAP_FULL_MAPPED) ||
+				(map.m_flags & EROFS_MAP_PARTIAL_REF);
 		}
 
 		if (map.m_la < offset) {
diff --git a/lib/zmap.c b/lib/zmap.c
index 2c2ba01..11228af 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -121,6 +121,7 @@ struct z_erofs_maprecorder {
 	u16 delta[2];
 	erofs_blk_t pblk, compressedlcs;
 	erofs_off_t nextpackoff;
+	bool partialref;
 };
 
 static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
@@ -183,6 +184,9 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 		break;
 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		if (advise & Z_EROFS_VLE_DI_PARTIAL_REF)
+			m->partialref = true;
+
 		m->clusterofs = le16_to_cpu(di->di_clusterofs);
 		m->pblk = le32_to_cpu(di->di_u.blkaddr);
 		break;
@@ -625,6 +629,8 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi,
 		goto out;
 	}
 
+	if (m.partialref)
+		map->m_flags |= EROFS_MAP_PARTIAL_REF;
 	map->m_llen = end - map->m_la;
 	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
 		vi->z_tailextent_headlcn = m.lcn;
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 8/8] erofs-utils: mkfs: introduce global compressed data deduplication
  2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
                   ` (6 preceding siblings ...)
  2022-09-26 15:25 ` [PATCH 7/8] erofs-utils: fuse: introduce partial-referenced pclusters Gao Xiang
@ 2022-09-26 15:25 ` Gao Xiang
  7 siblings, 0 replies; 10+ messages in thread
From: Gao Xiang @ 2022-09-26 15:25 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Yue Hu, Ziyang Zhang

From: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>

This patch introduces global compressed data deduplication to
reuse potential prefixes for each pcluster.

It also uses rolling hashing and tries to shorten the previous
compressed extent in order to explore more possibilities for
data deduplication.

Co-developped-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Ziyang Zhang <ZiyangZhang@linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/config.h |   1 +
 include/erofs/dedupe.h |  39 +++++++++
 lib/Makefile.am        |   3 +-
 lib/compress.c         | 123 +++++++++++++++++++++++-----
 lib/dedupe.c           | 176 +++++++++++++++++++++++++++++++++++++++++
 lib/rolling_hash.h     |  60 ++++++++++++++
 mkfs/main.c            |  19 +++++
 7 files changed, 399 insertions(+), 22 deletions(-)
 create mode 100644 include/erofs/dedupe.h
 create mode 100644 lib/dedupe.c
 create mode 100644 lib/rolling_hash.h

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 764b0f7..a93ab25 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -45,6 +45,7 @@ struct erofs_configure {
 	bool c_noinline_data;
 	bool c_ztailpacking;
 	bool c_fragments;
+	bool c_dedupe;
 	bool c_ignore_mtime;
 	bool c_showprogress;
 
diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
new file mode 100644
index 0000000..153bd4c
--- /dev/null
+++ b/include/erofs/dedupe.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#ifndef __EROFS_DEDUPE_H
+#define __EROFS_DEDUPE_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "internal.h"
+
+struct z_erofs_inmem_extent {
+	erofs_blk_t blkaddr;
+	unsigned int compressedblks;
+	unsigned int length;
+	bool raw, partial;
+};
+
+struct z_erofs_dedupe_ctx {
+	u8		*start, *end;
+	u8		*cur;
+	struct z_erofs_inmem_extent	e;
+};
+
+int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx);
+int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
+			  void *original_data);
+void z_erofs_dedupe_commit(bool drop);
+int z_erofs_dedupe_init(unsigned int wsiz);
+void z_erofs_dedupe_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 1a2071c..faa7311 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -29,7 +29,8 @@ noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
 		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
-		      fragments.c rb_tree.c
+		      fragments.c rb_tree.c dedupe.c
+
 liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/compress.c b/lib/compress.c
index c0bd307..17b3213 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -15,6 +15,7 @@
 #include "erofs/io.h"
 #include "erofs/cache.h"
 #include "erofs/compress.h"
+#include "erofs/dedupe.h"
 #include "compressor.h"
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
@@ -23,13 +24,6 @@
 static struct erofs_compress compresshandle;
 static unsigned int algorithmtype[2];
 
-struct z_erofs_inmem_extent {
-	erofs_blk_t blkaddr;
-	unsigned int compressedblks;
-	unsigned int length;
-	bool raw;
-};
-
 struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
 	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
@@ -75,9 +69,12 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 	unsigned int count = ctx->e.length;
 	unsigned int d0 = 0, d1 = (clusterofs + count) / EROFS_BLKSIZ;
 	struct z_erofs_vle_decompressed_index di;
-	unsigned int type;
-	__le16 advise;
+	unsigned int type, advise;
+
+	if (!count)
+		return;
 
+	ctx->e.length = 0;	/* mark as written first */
 	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
 
 	/* whether the tail-end (un)compressed block or not */
@@ -87,11 +84,12 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		 * is well-compressed for !ztailpacking cases.
 		 */
 		DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking);
+		DBG_BUGON(ctx->e.partial);
 		type = ctx->e.raw ? Z_EROFS_VLE_CLUSTER_TYPE_PLAIN :
 			Z_EROFS_VLE_CLUSTER_TYPE_HEAD;
-		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
+		advise = type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT;
+		di.di_advise = cpu_to_le16(advise);
 
-		di.di_advise = advise;
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY &&
 		    !ctx->e.compressedblks)
 			di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
@@ -106,6 +104,7 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 	}
 
 	do {
+		advise = 0;
 		/* XXX: big pcluster feature should be per-inode */
 		if (d0 == 1 && erofs_sb_has_big_pcluster()) {
 			type = Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD;
@@ -141,9 +140,14 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 				di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
 			else
 				di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+
+			if (ctx->e.partial) {
+				DBG_BUGON(ctx->e.raw);
+				advise |= Z_EROFS_VLE_DI_PARTIAL_REF;
+			}
 		}
-		advise = cpu_to_le16(type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT);
-		di.di_advise = advise;
+		advise |= type << Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT;
+		di.di_advise = cpu_to_le16(advise);
 
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
@@ -158,6 +162,70 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 	ctx->clusterofs = clusterofs + count;
 }
 
+static int z_erofs_compress_dedupe(struct erofs_inode *inode,
+				   struct z_erofs_vle_compress_ctx *ctx,
+				   unsigned int *len)
+{
+	int ret = 0;
+
+	do {
+		struct z_erofs_dedupe_ctx dctx = {
+			.start = ctx->queue + ctx->head - ({ int rc;
+				if (ctx->e.length <= EROFS_BLKSIZ)
+					rc = 0;
+				else if (ctx->e.length - EROFS_BLKSIZ >= ctx->head)
+					rc = ctx->head;
+				else
+					rc = ctx->e.length - EROFS_BLKSIZ;
+				rc; }),
+			.end = ctx->queue + ctx->head + *len,
+			.cur = ctx->queue + ctx->head,
+		};
+		int delta;
+
+		if (z_erofs_dedupe_match(&dctx))
+			break;
+
+		/* fall back to noncompact indexes for deduplication */
+		inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B;
+		inode->datalayout = EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+		erofs_sb_set_dedupe();
+
+		delta = ctx->queue + ctx->head - dctx.cur;
+		if (delta) {
+			DBG_BUGON(delta < 0);
+			DBG_BUGON(!ctx->e.length);
+			ctx->e.partial = true;
+			ctx->e.length -= delta;
+		}
+
+		erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks",
+			  dctx.e.length, dctx.e.raw ? "un" : "",
+			  delta, dctx.e.blkaddr, dctx.e.compressedblks);
+		z_erofs_write_indexes(ctx);
+		ctx->e = dctx.e;
+		ctx->head += dctx.e.length - delta;
+		DBG_BUGON(*len < dctx.e.length - delta);
+		*len -= dctx.e.length - delta;
+
+		if (ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
+			const unsigned int qh_aligned =
+				round_down(ctx->head, EROFS_BLKSIZ);
+			const unsigned int qh_after = ctx->head - qh_aligned;
+
+			memmove(ctx->queue, ctx->queue + qh_aligned,
+				*len + qh_after);
+			ctx->head = qh_after;
+			ctx->tail = qh_after + *len;
+			ret = -EAGAIN;
+			break;
+		}
+	} while (*len);
+
+	z_erofs_write_indexes(ctx);
+	return ret;
+}
+
 static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 				     unsigned int *len, char *dst)
 {
@@ -268,8 +336,11 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx,
 		bool may_packing = (cfg.c_fragments && final &&
 				   !erofs_is_packed_inode(inode));
 
+		if (z_erofs_compress_dedupe(inode, ctx, &len) && !final)
+			break;
+
 		if (len <= pclustersize) {
-			if (!final)
+			if (!final || !len)
 				break;
 			if (may_packing) {
 				ctx->e.length = len;
@@ -290,13 +361,17 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx,
 					  inode->i_srcpath,
 					  erofs_strerror(ret));
 			}
-			if (may_inline && len < EROFS_BLKSIZ)
+
+			if (may_inline && len < EROFS_BLKSIZ) {
 				ret = z_erofs_fill_inline_data(inode,
 						ctx->queue + ctx->head,
 						len, true);
-			else
+			} else {
+				may_inline = false;
+				may_packing = false;
 nocompression:
 				ret = write_uncompressed_extent(ctx, &len, dst);
+			}
 
 			if (ret < 0)
 				return ret;
@@ -367,11 +442,14 @@ frag_packing:
 			if (ret)
 				return ret;
 			ctx->e.raw = false;
+			may_inline = false;
+			may_packing = false;
 		}
-		/* write indexes for this pcluster */
+		ctx->e.partial = false;
 		ctx->e.blkaddr = ctx->blkaddr;
-		z_erofs_write_indexes(ctx);
-
+		if (!may_inline && !may_packing)
+			(void)z_erofs_dedupe_insert(&ctx->e,
+						    ctx->queue + ctx->head);
 		ctx->blkaddr += ctx->e.compressedblks;
 		ctx->head += ctx->e.length;
 		len -= ctx->e.length;
@@ -688,7 +766,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 		if (inode->datalayout == EROFS_INODE_FLAT_COMPRESSION)
 			inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2;
 	}
-	if (cfg.c_fragments)
+	if (cfg.c_fragments && !cfg.c_dedupe)
 		inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
 	inode->z_algorithmtype[0] = algorithmtype[0];
 	inode->z_algorithmtype[1] = algorithmtype[1];
@@ -729,15 +807,18 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 	DBG_BUGON(compressed_blocks < !!inode->idata_size);
 	compressed_blocks -= !!inode->idata_size;
 
+	z_erofs_write_indexes(&ctx);
 	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
 	if (!inode->fragment_size &&
 	    compressed_blocks * EROFS_BLKSIZ + inode->idata_size +
 	    legacymetasize >= inode->i_size) {
+		z_erofs_dedupe_commit(true);
 		ret = -ENOSPC;
 		goto err_free_idata;
 	}
+	z_erofs_dedupe_commit(false);
 	z_erofs_write_mapheader(inode, compressmeta);
 
 	/* if the entire file is a fragment, a simplified form is used. */
@@ -753,7 +834,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 		ret = erofs_bh_balloon(bh, blknr_to_addr(compressed_blocks));
 		DBG_BUGON(ret != EROFS_BLKSIZ);
 	} else {
-		if (!cfg.c_fragments)
+		if (!cfg.c_fragments && !cfg.c_dedupe)
 			DBG_BUGON(!inode->idata_size);
 	}
 
diff --git a/lib/dedupe.c b/lib/dedupe.c
new file mode 100644
index 0000000..c382303
--- /dev/null
+++ b/lib/dedupe.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#include "erofs/dedupe.h"
+#include "erofs/print.h"
+#include "rb_tree.h"
+#include "rolling_hash.h"
+
+void erofs_sha256(const unsigned char *in, unsigned long in_size,
+		  unsigned char out[32]);
+
+static unsigned int window_size, rollinghash_rm;
+static struct rb_tree *dedupe_tree, *dedupe_subtree;
+
+struct z_erofs_dedupe_item {
+	long long	hash;
+	u8		prefix_sha256[32];
+
+	erofs_blk_t	compressed_blkaddr;
+	unsigned int	compressed_blks;
+
+	int		original_length;
+	bool		partial, raw;
+	u8		extra_data[];
+};
+
+static int z_erofs_dedupe_rbtree_cmp(struct rb_tree *self,
+		struct rb_node *node_a, struct rb_node *node_b)
+{
+	struct z_erofs_dedupe_item *e_a = node_a->value;
+	struct z_erofs_dedupe_item *e_b = node_b->value;
+
+	return (e_a->hash > e_b->hash) - (e_a->hash < e_b->hash);
+}
+
+int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
+{
+	struct z_erofs_dedupe_item e_find;
+	u8 *cur;
+	bool initial = true;
+
+	if (!dedupe_tree)
+		return -ENOENT;
+
+	if (ctx->cur > ctx->end - window_size)
+		cur = ctx->end - window_size;
+	else
+		cur = ctx->cur;
+
+	/* move backward byte-by-byte */
+	for (; cur >= ctx->start; --cur) {
+		struct z_erofs_dedupe_item *e;
+		unsigned int extra;
+		u8 sha256[32];
+
+		if (initial) {
+			/* initial try */
+			e_find.hash = erofs_rolling_hash_init(cur, window_size, true);
+			initial = false;
+		} else {
+			e_find.hash = erofs_rolling_hash_advance(e_find.hash,
+				rollinghash_rm, cur[window_size], cur[0]);
+		}
+
+		e = rb_tree_find(dedupe_tree, &e_find);
+		if (!e) {
+			e = rb_tree_find(dedupe_subtree, &e_find);
+			if (!e)
+				continue;
+		}
+
+		erofs_sha256(cur, window_size, sha256);
+		if (memcmp(sha256, e->prefix_sha256, sizeof(sha256)))
+			continue;
+
+		extra = 0;
+		while (cur + window_size + extra < ctx->end &&
+		       window_size + extra < e->original_length &&
+		       e->extra_data[extra] == cur[window_size + extra])
+			++extra;
+
+		if (window_size + extra <= ctx->cur - cur)
+			continue;
+		ctx->cur = cur;
+		ctx->e.length = window_size + extra;
+		ctx->e.partial = e->partial ||
+			(window_size + extra < e->original_length);
+		ctx->e.raw = e->raw;
+		ctx->e.blkaddr = e->compressed_blkaddr;
+		ctx->e.compressedblks = e->compressed_blks;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
+			  void *original_data)
+{
+	struct z_erofs_dedupe_item *di;
+
+	if (e->length < window_size)
+		return 0;
+
+	di = malloc(sizeof(*di) + e->length - window_size);
+	if (!di)
+		return -ENOMEM;
+
+	di->original_length = e->length;
+	erofs_sha256(original_data, window_size, di->prefix_sha256);
+	di->hash = erofs_rolling_hash_init(original_data,
+			window_size, true);
+	memcpy(di->extra_data, original_data + window_size,
+	       e->length - window_size);
+	di->compressed_blkaddr = e->blkaddr;
+	di->compressed_blks = e->compressedblks;
+	di->partial = e->partial;
+	di->raw = e->raw;
+
+	/* with the same rolling hash */
+	if (!rb_tree_insert(dedupe_subtree, di))
+		free(di);
+	return 0;
+}
+
+static void z_erofs_dedupe_node_free_cb(struct rb_tree *self,
+					struct rb_node *node)
+{
+	free(node->value);
+	rb_tree_node_dealloc_cb(self, node);
+}
+
+void z_erofs_dedupe_commit(bool drop)
+{
+	if (!dedupe_subtree)
+		return;
+	if (!drop) {
+		struct rb_iter iter;
+		struct z_erofs_dedupe_item *di;
+
+		di = rb_iter_first(&iter, dedupe_subtree);
+		while (di) {
+			if (!rb_tree_insert(dedupe_tree, di))
+				DBG_BUGON(1);
+			di = rb_iter_next(&iter);
+		}
+		/*rb_iter_dealloc(iter);*/
+		rb_tree_dealloc(dedupe_subtree, rb_tree_node_dealloc_cb);
+	} else {
+		rb_tree_dealloc(dedupe_subtree, z_erofs_dedupe_node_free_cb);
+	}
+	dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+}
+
+int z_erofs_dedupe_init(unsigned int wsiz)
+{
+	dedupe_tree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+	if (!dedupe_tree)
+		return -ENOMEM;
+
+	dedupe_subtree = rb_tree_create(z_erofs_dedupe_rbtree_cmp);
+	if (!dedupe_subtree) {
+		rb_tree_dealloc(dedupe_subtree, NULL);
+		return -ENOMEM;
+	}
+	window_size = wsiz;
+	rollinghash_rm = erofs_rollinghash_calc_rm(window_size);
+	return 0;
+}
+
+void z_erofs_dedupe_exit(void)
+{
+	z_erofs_dedupe_commit(true);
+	rb_tree_dealloc(dedupe_subtree, NULL);
+	rb_tree_dealloc(dedupe_tree, z_erofs_dedupe_node_free_cb);
+}
diff --git a/lib/rolling_hash.h b/lib/rolling_hash.h
new file mode 100644
index 0000000..448db34
--- /dev/null
+++ b/lib/rolling_hash.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2022 Alibaba Cloud
+ */
+#ifndef __ROLLING_HASH_H__
+#define __ROLLING_HASH_H__
+
+#include <erofs/defs.h>
+
+#define PRIME_NUMBER	4294967295LL
+#define RADIX		256
+
+static inline long long erofs_rolling_hash_init(u8 *input,
+						int len, bool backwards)
+{
+	long long hash = 0;
+
+	if (!backwards) {
+		int i;
+
+		for (i = 0; i < len; ++i)
+			hash = (RADIX * hash + input[i]) % PRIME_NUMBER;
+	} else {
+		while (len)
+			hash = (RADIX * hash + input[--len]) % PRIME_NUMBER;
+	}
+	return hash;
+}
+
+/* RM = R ^ (M-1) % Q */
+/*
+ * NOTE: value of "hash" could be negative so we cannot use unsiged types for "hash"
+ * "long long" is used here and PRIME_NUMBER can be ULONG_MAX
+ */
+static inline long long erofs_rolling_hash_advance(long long old_hash,
+						   unsigned long long RM,
+						   u8 to_remove, u8 to_add)
+{
+	long long hash = old_hash;
+	long long to_remove_val = (to_remove * RM) % PRIME_NUMBER;
+
+	hash = RADIX * (old_hash - to_remove_val) % PRIME_NUMBER;
+	hash = (hash + to_add) % PRIME_NUMBER;
+
+	/* We might get negative value of hash, converting it to positive */
+	if (hash < 0)
+		hash += PRIME_NUMBER;
+	return hash;
+}
+
+static inline long long erofs_rollinghash_calc_rm(int window_size)
+{
+	int i;
+	long long RM = 1;
+
+	for (i = 0; i < window_size - 1; ++i)
+		RM = (RM * RADIX) % PRIME_NUMBER;
+	return RM;
+}
+#endif
diff --git a/mkfs/main.c b/mkfs/main.c
index bbca7b9..8b97796 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -18,6 +18,7 @@
 #include "erofs/inode.h"
 #include "erofs/io.h"
 #include "erofs/compress.h"
+#include "erofs/dedupe.h"
 #include "erofs/xattr.h"
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
@@ -220,6 +221,12 @@ static int parse_extended_opts(const char *opts)
 				cfg.c_pclusterblks_packed = i / EROFS_BLKSIZ;
 			}
 		}
+
+		if (MATCH_EXTENTED_OPT("dedupe", token, keylen)) {
+			if (vallen)
+				return -EINVAL;
+			cfg.c_dedupe = true;
+		}
 	}
 	return 0;
 }
@@ -699,6 +706,8 @@ int main(int argc, char **argv)
 		}
 		erofs_warn("EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
 	}
+	if (cfg.c_dedupe)
+		erofs_warn("EXPERIMENTAL data deduplication feature in use. Use at your own risk!");
 	erofs_set_fs_root(cfg.c_src_path);
 #ifndef NDEBUG
 	if (cfg.c_random_pclusterblks)
@@ -725,6 +734,15 @@ int main(int argc, char **argv)
 		goto exit;
 	}
 
+	if (cfg.c_dedupe) {
+		err = z_erofs_dedupe_init(EROFS_BLKSIZ);
+		if (err) {
+			erofs_err("failed to initialize deduplication: %s",
+				  erofs_strerror(err));
+			goto exit;
+		}
+	}
+
 	err = z_erofs_compress_init(sb_bh);
 	if (err) {
 		erofs_err("failed to initialize compressor: %s",
@@ -795,6 +813,7 @@ int main(int argc, char **argv)
 		err = erofs_mkfs_superblock_csum_set();
 exit:
 	z_erofs_compress_exit();
+	z_erofs_dedupe_exit();
 #ifdef WITH_ANDROID
 	erofs_droid_blocklist_fclose();
 #endif
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH] erofs-utils: lib: support fragments data decompression
  2022-09-26 15:25 ` [PATCH 2/8] erofs-utils: lib: support fragments data decompression Gao Xiang
@ 2022-09-30 13:08   ` Yue Hu
  0 siblings, 0 replies; 10+ messages in thread
From: Yue Hu @ 2022-09-30 13:08 UTC (permalink / raw)
  To: hsiangkao; +Cc: huyue2, linux-erofs, ZiyangZhang

From: Yue Hu <huyue2@coolpad.com>

Add compressed fragments support for erofsfuse.

Signed-off-by: Yue Hu <huyue2@coolpad.com>
---
changes: fix an issue with fragments=4K

 include/erofs/internal.h |  6 ++++++
 include/erofs_fs.h       | 26 +++++++++++++++++------
 lib/data.c               | 19 +++++++++++++++++
 lib/super.c              |  1 +
 lib/zmap.c               | 45 +++++++++++++++++++++++++++++++++++++---
 5 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index a4a1fe3..dd3776c 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -102,6 +102,7 @@ struct erofs_sb_info {
 		u16 devt_slotoff;		/* used for mkfs */
 		u16 device_id_mask;		/* used for others */
 	};
+	erofs_nid_t packed_nid;
 };
 
 /* global sbi */
@@ -132,6 +133,7 @@ EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
 EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
 EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
 EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
+EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
 
 #define EROFS_I_EA_INITED	(1 << 0)
@@ -209,6 +211,7 @@ struct erofs_inode {
 #ifdef WITH_ANDROID
 	uint64_t capabilities;
 #endif
+	erofs_off_t fragmentoff;
 };
 
 static inline bool is_inode_layout_compression(struct erofs_inode *inode)
@@ -279,6 +282,7 @@ enum {
 	BH_Mapped,
 	BH_Encoded,
 	BH_FullMapped,
+	BH_Fragment,
 };
 
 /* Has a disk mapping */
@@ -289,6 +293,8 @@ enum {
 #define EROFS_MAP_ENCODED	(1 << BH_Encoded)
 /* The length of extent is full */
 #define EROFS_MAP_FULL_MAPPED	(1 << BH_FullMapped)
+/* Located in the special packed inode */
+#define EROFS_MAP_FRAGMENT	(1 << BH_Fragment)
 
 struct erofs_map_blocks {
 	char mpage[EROFS_BLKSIZ];
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index b8a7421..e492ad9 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -25,13 +25,15 @@
 #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE	0x00000004
 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE	0x00000008
 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING	0x00000010
+#define EROFS_FEATURE_INCOMPAT_FRAGMENTS	0x00000020
 #define EROFS_ALL_FEATURE_INCOMPAT		\
 	(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
 	 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
 	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
 	 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
 	 EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
-	 EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
+	 EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
+	 EROFS_FEATURE_INCOMPAT_FRAGMENTS)
 
 #define EROFS_SB_EXTSLOT_SIZE	16
 
@@ -73,7 +75,9 @@ struct erofs_super_block {
 	} __packed u1;
 	__le16 extra_devices;	/* # of devices besides the primary device */
 	__le16 devt_slotoff;	/* startoff = devt_slotoff * devt_slotsize */
-	__u8 reserved2[38];
+	__u8 reserved[6];
+	__le64 packed_nid;	/* nid of the special packed inode */
+	__u8 reserved2[24];
 };
 
 /*
@@ -295,17 +299,26 @@ struct z_erofs_lzma_cfgs {
  * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
  * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
  * bit 4 : interlaced plain pcluster (0 - off; 1 - on)
+ * bit 5 : fragment pcluster (0 - off; 1 - on)
  */
 #define Z_EROFS_ADVISE_COMPACTED_2B		0x0001
 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1		0x0002
 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2		0x0004
 #define Z_EROFS_ADVISE_INLINE_PCLUSTER		0x0008
 #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER	0x0010
+#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER	0x0020
 
+#define Z_EROFS_FRAGMENT_INODE_BIT		7
 struct z_erofs_map_header {
-	__le16	h_reserved1;
-	/* record the size of tailpacking data */
-	__le16  h_idata_size;
+	union {
+		/* direct addressing for fragment offset */
+		__le32	h_fragmentoff;
+		struct {
+			__le16  h_reserved1;
+			/* record the size of tailpacking data */
+			__le16	h_idata_size;
+		};
+	};
 	__le16	h_advise;
 	/*
 	 * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
@@ -314,7 +327,8 @@ struct z_erofs_map_header {
 	__u8	h_algorithmtype;
 	/*
 	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
-	 * bit 3-7 : reserved.
+	 * bit 3-6 : reserved;
+	 * bit 7   : move the whole file into packed inode or not.
 	 */
 	__u8	h_clusterbits;
 };
diff --git a/lib/data.c b/lib/data.c
index af52fde..bcb0f7e 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -275,6 +275,25 @@ static int z_erofs_read_data(struct erofs_inode *inode, char *buffer,
 			continue;
 		}
 
+		if (map.m_flags & EROFS_MAP_FRAGMENT) {
+			struct erofs_inode packed_inode = {
+				.nid = sbi.packed_nid,
+			};
+
+			ret = erofs_read_inode_from_disk(&packed_inode);
+			if (ret) {
+				erofs_err("failed to read packed inode from disk");
+				return ret;
+			}
+
+			ret = z_erofs_read_data(&packed_inode,
+					buffer + end - offset, length - skip,
+					inode->fragmentoff + skip);
+			if (ret < 0)
+				break;
+			continue;
+		}
+
 		if (map.m_plen > bufsize) {
 			bufsize = map.m_plen;
 			raw = realloc(raw, bufsize);
diff --git a/lib/super.c b/lib/super.c
index b267412..30aeb36 100644
--- a/lib/super.c
+++ b/lib/super.c
@@ -104,6 +104,7 @@ int erofs_read_superblock(void)
 	sbi.xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
 	sbi.islotbits = EROFS_ISLOTBITS;
 	sbi.root_nid = le16_to_cpu(dsb->root_nid);
+	sbi.packed_nid = le64_to_cpu(dsb->packed_nid);
 	sbi.inos = le64_to_cpu(dsb->inos);
 	sbi.checksum = le32_to_cpu(dsb->checksum);
 
diff --git a/lib/zmap.c b/lib/zmap.c
index 806d608..1ba7c2f 100644
--- a/lib/zmap.c
+++ b/lib/zmap.c
@@ -17,7 +17,7 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi,
 int z_erofs_fill_inode(struct erofs_inode *vi)
 {
 	if (!erofs_sb_has_big_pcluster() &&
-	    !erofs_sb_has_ztailpacking() &&
+	    !erofs_sb_has_ztailpacking() && !erofs_sb_has_fragments() &&
 	    vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
 		vi->z_advise = 0;
 		vi->z_algorithmtype[0] = 0;
@@ -40,7 +40,7 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi)
 		return 0;
 
 	DBG_BUGON(!erofs_sb_has_big_pcluster() &&
-		  !erofs_sb_has_ztailpacking() &&
+		  !erofs_sb_has_ztailpacking() && !erofs_sb_has_fragments() &&
 		  vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
 	pos = round_up(iloc(vi->nid) + vi->inode_isize + vi->xattr_isize, 8);
 
@@ -49,6 +49,17 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi)
 		return -EIO;
 
 	h = (struct z_erofs_map_header *)buf;
+	/*
+	 * if the highest bit of the 8-byte map header is set, the whole file
+	 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
+	 */
+	if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
+		vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+		vi->fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
+		vi->z_tailextent_headlcn = 0;
+		goto out;
+	}
+
 	vi->z_advise = le16_to_cpu(h->h_advise);
 	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
 	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
@@ -83,6 +94,17 @@ static int z_erofs_fill_inode_lazy(struct erofs_inode *vi)
 		if (ret < 0)
 			return ret;
 	}
+	if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
+	    !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
+		struct erofs_map_blocks map = { .index = UINT_MAX };
+
+		vi->fragmentoff = le32_to_cpu(h->h_fragmentoff);
+		ret = z_erofs_do_map_blocks(vi, &map,
+					    EROFS_GET_BLOCKS_FINDTAIL);
+		if (ret < 0)
+			return ret;
+	}
+out:
 	vi->flags |= EROFS_I_Z_INITED;
 	return 0;
 }
@@ -546,6 +568,7 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi,
 				 int flags)
 {
 	bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
+	bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
 	struct z_erofs_maprecorder m = {
 		.inode = vi,
 		.map = map,
@@ -603,12 +626,19 @@ static int z_erofs_do_map_blocks(struct erofs_inode *vi,
 	}
 
 	map->m_llen = end - map->m_la;
-	if (flags & EROFS_GET_BLOCKS_FINDTAIL)
+	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
 		vi->z_tailextent_headlcn = m.lcn;
+		/* for non-compact indexes, fragmentoff is 64 bits */
+		if (fragment &&
+		    vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+			vi->fragmentoff |= (u64)m.pblk << 32;
+	}
 	if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
 		map->m_flags |= EROFS_MAP_META;
 		map->m_pa = vi->z_idataoff;
 		map->m_plen = vi->z_idata_size;
+	} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
+		map->m_flags |= EROFS_MAP_FRAGMENT;
 	} else {
 		map->m_pa = blknr_to_addr(m.pblk);
 		err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
@@ -658,6 +688,15 @@ int z_erofs_map_blocks_iter(struct erofs_inode *vi,
 	if (err)
 		goto out;
 
+	if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
+	    !vi->z_tailextent_headlcn) {
+		map->m_la = 0;
+		map->m_llen = vi->i_size;
+		map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
+				EROFS_MAP_FRAGMENT;
+		goto out;
+	}
+
 	err = z_erofs_do_map_blocks(vi, map, flags);
 out:
 	DBG_BUGON(err < 0 && err != -ENOMEM);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-09-30 13:24 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-26 15:25 [PATCH 0/8] erofs-utils: collection for fragments and dedupe Gao Xiang
2022-09-26 15:25 ` [PATCH 1/8] erofs-utils: fuse: support interlaced uncompressed pcluster Gao Xiang
2022-09-26 15:25 ` [PATCH 2/8] erofs-utils: lib: support fragments data decompression Gao Xiang
2022-09-30 13:08   ` [PATCH] " Yue Hu
2022-09-26 15:25 ` [PATCH 3/8] erofs-utils: introduce z_erofs_inmem_extent Gao Xiang
2022-09-26 15:25 ` [PATCH 4/8] erofs-utils: mkfs: support interlaced uncompressed data layout Gao Xiang
2022-09-26 15:25 ` [PATCH 5/8] erofs-utils: mkfs: support fragments Gao Xiang
2022-09-26 15:25 ` [PATCH 6/8] erofs-utils: lib: add rb-tree implementation Gao Xiang
2022-09-26 15:25 ` [PATCH 7/8] erofs-utils: fuse: introduce partial-referenced pclusters Gao Xiang
2022-09-26 15:25 ` [PATCH 8/8] erofs-utils: mkfs: introduce global compressed data deduplication Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).