linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/5] erofs-utils: add support for chunk-based files
@ 2021-09-22 18:56 Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao

v1 & 2: https://lore.kernel.org/r/20210818070316.1970-2-hsiangkao@linux.alibaba.com

changes since v2:
 - add erofsfuse support for chunk-based files;
 - add support for 4-byte blockmap array in addition to chunk indexes;
 - update manpages;
 - minor cleanups.

Gao Xiang (5):
  erofs-utils: fuse: support reading chunk-based uncompressed files
  erofs-utils: introduce hashmap from git source
  erofs-utils: introduce sha256
  erofs-utils: introduce copy_file_range
  erofs-utils: mkfs: support chunk-based uncompressed files

 configure.ac               |   1 +
 include/erofs/blobchunk.h  |  18 +++
 include/erofs/config.h     |   1 +
 include/erofs/defs.h       |  77 ++++++++++
 include/erofs/flex-array.h | 147 +++++++++++++++++++
 include/erofs/hashmap.h    | 103 ++++++++++++++
 include/erofs/hashtable.h  |  77 ----------
 include/erofs/internal.h   |   6 +
 include/erofs/io.h         |   7 +
 include/erofs_fs.h         |  48 ++++++-
 lib/Makefile.am            |   3 +-
 lib/blobchunk.c            | 217 ++++++++++++++++++++++++++++
 lib/data.c                 |  86 +++++++++--
 lib/hashmap.c              | 284 +++++++++++++++++++++++++++++++++++++
 lib/inode.c                |  36 ++++-
 lib/io.c                   |  97 ++++++++++++-
 lib/namei.c                |  15 +-
 lib/sha256.c               | 248 ++++++++++++++++++++++++++++++++
 man/mkfs.erofs.1           |   3 +
 mkfs/main.c                |  38 +++++
 20 files changed, 1413 insertions(+), 99 deletions(-)
 create mode 100644 include/erofs/blobchunk.h
 create mode 100644 include/erofs/flex-array.h
 create mode 100644 include/erofs/hashmap.h
 create mode 100644 lib/blobchunk.c
 create mode 100644 lib/hashmap.c
 create mode 100644 lib/sha256.c

-- 
2.24.4


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files
  2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 2/5] erofs-utils: introduce hashmap from git source Gao Xiang
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao

Keep in sync with the latest kernel
commit 2a9dc7a8fec6 ("erofs: introduce chunk-based file on-disk format")
and
commit c5aa903a59db ("erofs: support reading chunk-based uncompressed files")

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/internal.h |  5 +++
 include/erofs_fs.h       | 48 ++++++++++++++++++++--
 lib/data.c               | 86 +++++++++++++++++++++++++++++++++++-----
 lib/namei.c              | 15 ++++++-
 4 files changed, 140 insertions(+), 14 deletions(-)

diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index f5eacea5d4d7..8621f3426410 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -109,6 +109,7 @@ static inline void erofs_sb_clear_##name(void) \
 EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
 EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
 EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
+EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
 
 #define EROFS_I_EA_INITED	(1 << 0)
@@ -140,6 +141,10 @@ struct erofs_inode {
 		u32 i_blkaddr;
 		u32 i_blocks;
 		u32 i_rdev;
+		struct {
+			unsigned short	chunkformat;
+			unsigned char	chunkbits;
+		};
 	} u;
 
 	char i_srcpath[PATH_MAX + 1];
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 48934bb76cec..66a68e3b2065 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -4,7 +4,7 @@
  *
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
+ * Copyright (C) 2021, Alibaba Cloud
  */
 #ifndef __EROFS_FS_H
 #define __EROFS_FS_H
@@ -21,10 +21,12 @@
 #define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING	0x00000001
 #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS	0x00000002
 #define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER	0x00000002
+#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE	0x00000004
 #define EROFS_ALL_FEATURE_INCOMPAT		\
 	(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
 	 EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
-	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
+	 EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
+	 EROFS_FEATURE_INCOMPAT_CHUNKED_FILE)
 
 #define EROFS_SB_EXTSLOT_SIZE	16
 
@@ -66,13 +68,16 @@ struct erofs_super_block {
  * inode, [xattrs], last_inline_data, ... | ... | no-holed data
  * 3 - inode compression D:
  * inode, [xattrs], map_header, extents ... | ...
- * 4~7 - reserved
+ * 4 - inode chunk-based E:
+ * inode, [xattrs], chunk indexes ... | ...
+ * 5~7 - reserved
  */
 enum {
 	EROFS_INODE_FLAT_PLAIN			= 0,
 	EROFS_INODE_FLAT_COMPRESSION_LEGACY	= 1,
 	EROFS_INODE_FLAT_INLINE			= 2,
 	EROFS_INODE_FLAT_COMPRESSION		= 3,
+	EROFS_INODE_CHUNK_BASED			= 4,
 	EROFS_INODE_DATALAYOUT_MAX
 };
 
@@ -92,6 +97,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
 #define EROFS_I_ALL	\
 	((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
 
+/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */
+#define EROFS_CHUNK_FORMAT_BLKBITS_MASK		0x001F
+/* with chunk indexes or just a 4-byte blkaddr array */
+#define EROFS_CHUNK_FORMAT_INDEXES		0x0020
+
+#define EROFS_CHUNK_FORMAT_ALL	\
+	(EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
+
+struct erofs_inode_chunk_info {
+	__le16 format;		/* chunk blkbits, etc. */
+	__le16 reserved;
+};
+
 /* 32-byte reduced form of an ondisk inode */
 struct erofs_inode_compact {
 	__le16 i_format;	/* inode format hints */
@@ -109,6 +127,9 @@ struct erofs_inode_compact {
 
 		/* for device files, used to indicate old/new device # */
 		__le32 rdev;
+
+		/* for chunk-based files, it contains the summary info */
+		struct erofs_inode_chunk_info c;
 	} i_u;
 	__le32 i_ino;           /* only used for 32-bit stat compatibility */
 	__le16 i_uid;
@@ -137,6 +158,9 @@ struct erofs_inode_extended {
 
 		/* for device files, used to indicate old/new device # */
 		__le32 rdev;
+
+		/* for chunk-based files, it contains the summary info */
+		struct erofs_inode_chunk_info c;
 	} i_u;
 
 	/* only used for 32-bit stat compatibility */
@@ -206,6 +230,19 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
 				 e->e_name_len + le16_to_cpu(e->e_value_size));
 }
 
+/* represent a zeroed chunk (hole) */
+#define EROFS_NULL_ADDR			-1
+
+/* 4-byte block address array */
+#define EROFS_BLOCK_MAP_ENTRY_SIZE	sizeof(__le32)
+
+/* 8-byte inode chunk indexes */
+struct erofs_inode_chunk_index {
+	__le16 advise;		/* always 0, don't care for now */
+	__le16 device_id;	/* back-end storage id, always 0 for now */
+	__le32 blkaddr;		/* start block address of this inode chunk */
+};
+
 /* maximum supported size of a physical compression cluster */
 #define Z_EROFS_PCLUSTER_MAX_SIZE	(1024 * 1024)
 
@@ -350,9 +387,14 @@ static inline void erofs_check_ondisk_layout_definitions(void)
 	BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
 	BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
 	BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
 	BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
 	BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
 	BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+	/* keep in sync between 2 index structures for better extendibility */
+	BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
+		     sizeof(struct z_erofs_vle_decompressed_index));
 
 	BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
 		     Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
diff --git a/lib/data.c b/lib/data.c
index 1a1005a67350..641d8408b54f 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -25,13 +25,6 @@ static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
 	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
 	lastblk = nblocks - tailendpacking;
 
-	if (offset >= inode->i_size) {
-		/* leave out-of-bound access unmapped */
-		map->m_flags = 0;
-		map->m_plen = 0;
-		goto out;
-	}
-
 	/* there is no hole in flatmode */
 	map->m_flags = EROFS_MAP_MAPPED;
 
@@ -62,14 +55,86 @@ static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
 		goto err_out;
 	}
 
-out:
 	map->m_llen = map->m_plen;
-
 err_out:
 	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
 	return err;
 }
 
+static int erofs_map_blocks(struct erofs_inode *inode,
+			    struct erofs_map_blocks *map, int flags)
+{
+	struct erofs_inode *vi = inode;
+	struct erofs_inode_chunk_index *idx;
+	u8 buf[EROFS_BLKSIZ];
+	u64 chunknr;
+	unsigned int unit;
+	erofs_off_t pos;
+	int err = 0;
+
+	if (map->m_la >= inode->i_size) {
+		/* leave out-of-bound access unmapped */
+		map->m_flags = 0;
+		map->m_plen = 0;
+		goto out;
+	}
+
+	if (vi->datalayout != EROFS_INODE_CHUNK_BASED)
+		return erofs_map_blocks_flatmode(inode, map, flags);
+
+	if (vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+		unit = sizeof(*idx);			/* chunk index */
+	else
+		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;	/* block map */
+
+	chunknr = map->m_la >> vi->u.chunkbits;
+	pos = roundup(iloc(vi->nid) + vi->inode_isize +
+		      vi->xattr_isize, unit) + unit * chunknr;
+
+	err = blk_read(buf, erofs_blknr(pos), 1);
+	if (err < 0)
+		return -EIO;
+
+	map->m_la = chunknr << vi->u.chunkbits;
+	map->m_plen = min_t(erofs_off_t, 1UL << vi->u.chunkbits,
+			    roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
+
+	/* handle block map */
+	if (!(vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
+		__le32 *blkaddr = (void *)buf + erofs_blkoff(pos);
+
+		if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
+			map->m_flags = 0;
+		} else {
+			map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
+			map->m_flags = EROFS_MAP_MAPPED;
+		}
+		goto out;
+	}
+	/* parse chunk indexes */
+	idx = (void *)buf + erofs_blkoff(pos);
+	switch (le32_to_cpu(idx->blkaddr)) {
+	case EROFS_NULL_ADDR:
+		map->m_flags = 0;
+		break;
+	default:
+		/* only one device is supported for now */
+		if (idx->device_id) {
+			erofs_err("invalid device id %u @ %" PRIu64 " for nid %llu",
+				  le16_to_cpu(idx->device_id),
+				  chunknr, vi->nid | 0ULL);
+			err = -EFSCORRUPTED;
+			goto out;
+		}
+		map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
+		map->m_flags = EROFS_MAP_MAPPED;
+		break;
+	}
+out:
+	map->m_llen = map->m_plen;
+	return err;
+}
+
 static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
 			       erofs_off_t size, erofs_off_t offset)
 {
@@ -84,7 +149,7 @@ static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
 		erofs_off_t eend;
 
 		map.m_la = ptr;
-		ret = erofs_map_blocks_flatmode(inode, &map, 0);
+		ret = erofs_map_blocks(inode, &map, 0);
 		if (ret)
 			return ret;
 
@@ -206,6 +271,7 @@ int erofs_pread(struct erofs_inode *inode, char *buf,
 	switch (inode->datalayout) {
 	case EROFS_INODE_FLAT_PLAIN:
 	case EROFS_INODE_FLAT_INLINE:
+	case EROFS_INODE_CHUNK_BASED:
 		return erofs_read_raw_data(inode, buf, count, offset);
 	case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
 	case EROFS_INODE_FLAT_COMPRESSION:
diff --git a/lib/namei.c b/lib/namei.c
index f96e400c36b0..b4bdabf10acb 100644
--- a/lib/namei.c
+++ b/lib/namei.c
@@ -82,6 +82,9 @@ static int erofs_read_inode_from_disk(struct erofs_inode *vi)
 		vi->i_ctime = le64_to_cpu(die->i_ctime);
 		vi->i_ctime_nsec = le64_to_cpu(die->i_ctime_nsec);
 		vi->i_size = le64_to_cpu(die->i_size);
+		if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+			/* fill chunked inode summary info */
+			vi->u.chunkformat = le16_to_cpu(die->i_u.c.format);
 		break;
 	case EROFS_INODE_LAYOUT_COMPACT:
 		vi->inode_isize = sizeof(struct erofs_inode_compact);
@@ -115,6 +118,8 @@ static int erofs_read_inode_from_disk(struct erofs_inode *vi)
 		vi->i_ctime_nsec = sbi.build_time_nsec;
 
 		vi->i_size = le32_to_cpu(dic->i_size);
+		if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+			vi->u.chunkformat = le16_to_cpu(dic->i_u.c.format);
 		break;
 	default:
 		erofs_err("unsupported on-disk inode version %u of nid %llu",
@@ -123,7 +128,15 @@ static int erofs_read_inode_from_disk(struct erofs_inode *vi)
 	}
 
 	vi->flags = 0;
-	if (erofs_inode_is_data_compressed(vi->datalayout))
+	if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+		if (vi->u.chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
+			erofs_err("unsupported chunk format %x of nid %llu",
+				  vi->u.chunkformat, vi->nid | 0ULL);
+			return -EOPNOTSUPP;
+		}
+		vi->u.chunkbits = LOG_BLOCK_SIZE +
+			(vi->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
+	} else if (erofs_inode_is_data_compressed(vi->datalayout))
 		z_erofs_fill_inode(vi);
 	return 0;
 bogusimode:
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 2/5] erofs-utils: introduce hashmap from git source
  2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 3/5] erofs-utils: introduce sha256 Gao Xiang
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao

Copied from git source (it's already workable).

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/flex-array.h | 147 +++++++++++++++++++
 include/erofs/hashmap.h    | 103 ++++++++++++++
 lib/Makefile.am            |   3 +-
 lib/hashmap.c              | 284 +++++++++++++++++++++++++++++++++++++
 4 files changed, 536 insertions(+), 1 deletion(-)
 create mode 100644 include/erofs/flex-array.h
 create mode 100644 include/erofs/hashmap.h
 create mode 100644 lib/hashmap.c

diff --git a/include/erofs/flex-array.h b/include/erofs/flex-array.h
new file mode 100644
index 000000000000..59168d05ee5a
--- /dev/null
+++ b/include/erofs/flex-array.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EROFS_FLEX_ARRAY_H
+#define __EROFS_FLEX_ARRAY_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "defs.h"
+#include "print.h"
+
+/*
+ * flex-array.h
+ *
+ * Some notes to make sense of the code.
+ *
+ * Flex-arrays:
+ *   - Flex-arrays became standard in C99 and are defined by "array[]" (at the
+ *     end of a struct)
+ *   - Pre-C99 flex-arrays can be accomplished by "array[1]"
+ *   - There is a GNU extension where they are defined using "array[0]"
+ *     Allegedly there is/was a bug in gcc whereby foo[1] generated incorrect
+ *     code, so it's safest to use [0] (https://lkml.org/lkml/2015/2/18/407).
+ *
+ * For C89 and C90, __STDC__ is 1
+ * For later standards, __STDC_VERSION__ is defined according to the standard.
+ * For example: 199901L or 201112L
+ *
+ * Whilst we're on the subject, in version 5 of gcc, the default std was
+ * changed from gnu89 to gnu11. In jgmenu, CFLAGS therefore contains -std=gnu89
+ * You can check your default gcc std by doing:
+ * gcc -dM -E - </dev/null | grep '__STDC_VERSION__\|__STDC__'
+ *
+ * The code below is copied from git's git-compat-util.h in support of
+ * hashmap.c
+ */
+
+#ifndef FLEX_ARRAY
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+	(!defined(__SUNPRO_C) || (__SUNPRO_C > 0x580))
+# define FLEX_ARRAY /* empty */
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 3)
+#  define FLEX_ARRAY /* empty */
+# else
+#  define FLEX_ARRAY 0 /* older GNU extension */
+# endif
+#endif
+
+/* Otherwise, default to safer but a bit wasteful traditional style */
+#ifndef FLEX_ARRAY
+# define FLEX_ARRAY 1
+#endif
+#endif
+
+#define bitsizeof(x) (CHAR_BIT * sizeof(x))
+
+#define maximum_signed_value_of_type(a) \
+	(INTMAX_MAX >> (bitsizeof(intmax_t) - bitsizeof(a)))
+
+#define maximum_unsigned_value_of_type(a) \
+	(UINTMAX_MAX >> (bitsizeof(uintmax_t) - bitsizeof(a)))
+
+/*
+ * Signed integer overflow is undefined in C, so here's a helper macro
+ * to detect if the sum of two integers will overflow.
+ * Requires: a >= 0, typeof(a) equals typeof(b)
+ */
+#define signed_add_overflows(a, b) \
+	((b) > maximum_signed_value_of_type(a) - (a))
+
+#define unsigned_add_overflows(a, b) \
+	((b) > maximum_unsigned_value_of_type(a) - (a))
+
+static inline size_t st_add(size_t a, size_t b)
+{
+	if (unsigned_add_overflows(a, b)) {
+		erofs_err("size_t overflow: %llu + %llu", a | 0ULL, b | 0ULL);
+		BUG_ON(1);
+		return -1;
+	}
+	return a + b;
+}
+
+#define st_add3(a, b, c) st_add(st_add((a), (b)), (c))
+#define st_add4(a, b, c, d) st_add(st_add3((a), (b), (c)), (d))
+
+/*
+ * These functions help you allocate structs with flex arrays, and copy
+ * the data directly into the array. For example, if you had:
+ *
+ *   struct foo {
+ *     int bar;
+ *     char name[FLEX_ARRAY];
+ *   };
+ *
+ * you can do:
+ *
+ *   struct foo *f;
+ *   FLEX_ALLOC_MEM(f, name, src, len);
+ *
+ * to allocate a "foo" with the contents of "src" in the "name" field.
+ * The resulting struct is automatically zero'd, and the flex-array field
+ * is NUL-terminated (whether the incoming src buffer was or not).
+ *
+ * The FLEXPTR_* variants operate on structs that don't use flex-arrays,
+ * but do want to store a pointer to some extra data in the same allocated
+ * block. For example, if you have:
+ *
+ *   struct foo {
+ *     char *name;
+ *     int bar;
+ *   };
+ *
+ * you can do:
+ *
+ *   struct foo *f;
+ *   FLEXPTR_ALLOC_STR(f, name, src);
+ *
+ * and "name" will point to a block of memory after the struct, which will be
+ * freed along with the struct (but the pointer can be repointed anywhere).
+ *
+ * The *_STR variants accept a string parameter rather than a ptr/len
+ * combination.
+ *
+ * Note that these macros will evaluate the first parameter multiple
+ * times, and it must be assignable as an lvalue.
+ */
+#define FLEX_ALLOC_MEM(x, flexname, buf, len) do { \
+	size_t flex_array_len_ = (len); \
+	(x) = calloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
+	BUG_ON(!(x)); \
+	memcpy((void *)(x)->flexname, (buf), flex_array_len_); \
+} while (0)
+#define FLEXPTR_ALLOC_MEM(x, ptrname, buf, len) do { \
+	size_t flex_array_len_ = (len); \
+	(x) = xcalloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
+	memcpy((x) + 1, (buf), flex_array_len_); \
+	(x)->ptrname = (void *)((x) + 1); \
+} while (0)
+#define FLEX_ALLOC_STR(x, flexname, str) \
+	FLEX_ALLOC_MEM((x), flexname, (str), strlen(str))
+#define FLEXPTR_ALLOC_STR(x, ptrname, str) \
+	FLEXPTR_ALLOC_MEM((x), ptrname, (str), strlen(str))
+
+#endif
diff --git a/include/erofs/hashmap.h b/include/erofs/hashmap.h
new file mode 100644
index 000000000000..024a14e497d4
--- /dev/null
+++ b/include/erofs/hashmap.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EROFS_HASHMAP_H
+#define __EROFS_HASHMAP_H
+
+/* Copied from https://github.com/git/git.git */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "flex-array.h"
+
+/*
+ * Generic implementation of hash-based key-value mappings.
+ * See Documentation/technical/api-hashmap.txt.
+ */
+
+/* FNV-1 functions */
+unsigned int strhash(const char *str);
+unsigned int strihash(const char *str);
+unsigned int memhash(const void *buf, size_t len);
+unsigned int memihash(const void *buf, size_t len);
+
+static inline unsigned int sha1hash(const unsigned char *sha1)
+{
+	/*
+	 * Equivalent to 'return *(unsigned int *)sha1;', but safe on
+	 * platforms that don't support unaligned reads.
+	 */
+	unsigned int hash;
+
+	memcpy(&hash, sha1, sizeof(hash));
+	return hash;
+}
+
+/* data structures */
+struct hashmap_entry {
+	struct hashmap_entry *next;
+	unsigned int hash;
+};
+
+typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key,
+		const void *keydata);
+
+struct hashmap {
+	struct hashmap_entry **table;
+	hashmap_cmp_fn cmpfn;
+	unsigned int size, tablesize, grow_at, shrink_at;
+};
+
+struct hashmap_iter {
+	struct hashmap *map;
+	struct hashmap_entry *next;
+	unsigned int tablepos;
+};
+
+/* hashmap functions */
+void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
+		  size_t initial_size);
+void hashmap_free(struct hashmap *map, int free_entries);
+
+/* hashmap_entry functions */
+static inline void hashmap_entry_init(void *entry, unsigned int hash)
+{
+	struct hashmap_entry *e = entry;
+
+	e->hash = hash;
+	e->next = NULL;
+}
+
+void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata);
+void *hashmap_get_next(const struct hashmap *map, const void *entry);
+void hashmap_add(struct hashmap *map, void *entry);
+void *hashmap_put(struct hashmap *map, void *entry);
+void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata);
+
+static inline void *hashmap_get_from_hash(const struct hashmap *map,
+					  unsigned int hash,
+					  const void *keydata)
+{
+	struct hashmap_entry key;
+
+	hashmap_entry_init(&key, hash);
+	return hashmap_get(map, &key, keydata);
+}
+
+/* hashmap_iter functions */
+void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter);
+void *hashmap_iter_next(struct hashmap_iter *iter);
+static inline void *hashmap_iter_first(struct hashmap *map,
+				       struct hashmap_iter *iter)
+{
+	hashmap_iter_init(map, iter);
+	return hashmap_iter_next(iter);
+}
+
+/* string interning */
+const void *memintern(const void *data, size_t len);
+static inline const char *strintern(const char *string)
+{
+	return memintern(string, strlen(string));
+}
+
+#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 5a33e297c194..7d00bf5fafdc 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -21,7 +21,8 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
 
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
-		      namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
+		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
+		      compress_hints.c hashmap.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/hashmap.c b/lib/hashmap.c
new file mode 100644
index 000000000000..e11bd8da94c1
--- /dev/null
+++ b/lib/hashmap.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from https://github.com/git/git.git
+ * Generic implementation of hash-based key value mappings.
+ */
+#include "erofs/hashmap.h"
+
+#define FNV32_BASE ((unsigned int)0x811c9dc5)
+#define FNV32_PRIME ((unsigned int)0x01000193)
+
+unsigned int strhash(const char *str)
+{
+	unsigned int c, hash = FNV32_BASE;
+
+	while ((c = (unsigned char)*str++))
+		hash = (hash * FNV32_PRIME) ^ c;
+	return hash;
+}
+
+unsigned int strihash(const char *str)
+{
+	unsigned int c, hash = FNV32_BASE;
+
+	while ((c = (unsigned char)*str++)) {
+		if (c >= 'a' && c <= 'z')
+			c -= 'a' - 'A';
+		hash = (hash * FNV32_PRIME) ^ c;
+	}
+	return hash;
+}
+
+unsigned int memhash(const void *buf, size_t len)
+{
+	unsigned int hash = FNV32_BASE;
+	unsigned char *ucbuf = (unsigned char *)buf;
+
+	while (len--) {
+		unsigned int c = *ucbuf++;
+
+		hash = (hash * FNV32_PRIME) ^ c;
+	}
+	return hash;
+}
+
+unsigned int memihash(const void *buf, size_t len)
+{
+	unsigned int hash = FNV32_BASE;
+	unsigned char *ucbuf = (unsigned char *)buf;
+
+	while (len--) {
+		unsigned int c = *ucbuf++;
+
+		if (c >= 'a' && c <= 'z')
+			c -= 'a' - 'A';
+		hash = (hash * FNV32_PRIME) ^ c;
+	}
+	return hash;
+}
+
+#define HASHMAP_INITIAL_SIZE 64
+/* grow / shrink by 2^2 */
+#define HASHMAP_RESIZE_BITS 2
+/* load factor in percent */
+#define HASHMAP_LOAD_FACTOR 80
+
+static void alloc_table(struct hashmap *map, unsigned int size)
+{
+	map->tablesize = size;
+	map->table = calloc(size, sizeof(struct hashmap_entry *));
+	BUG_ON(!map->table);
+
+	/* calculate resize thresholds for new size */
+	map->grow_at = (unsigned int)((uint64_t)size * HASHMAP_LOAD_FACTOR / 100);
+	if (size <= HASHMAP_INITIAL_SIZE)
+		map->shrink_at = 0;
+	else
+		/*
+		 * The shrink-threshold must be slightly smaller than
+		 * (grow-threshold / resize-factor) to prevent erratic resizing,
+		 * thus we divide by (resize-factor + 1).
+		 */
+		map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1);
+}
+
+static inline int entry_equals(const struct hashmap *map,
+			       const struct hashmap_entry *e1,
+			       const struct hashmap_entry *e2,
+			       const void *keydata)
+{
+	return (e1 == e2) || (e1->hash == e2->hash && !map->cmpfn(e1, e2, keydata));
+}
+
+static inline unsigned int bucket(const struct hashmap *map,
+				  const struct hashmap_entry *key)
+{
+	return key->hash & (map->tablesize - 1);
+}
+
+static void rehash(struct hashmap *map, unsigned int newsize)
+{
+	unsigned int i, oldsize = map->tablesize;
+	struct hashmap_entry **oldtable = map->table;
+
+	alloc_table(map, newsize);
+	for (i = 0; i < oldsize; i++) {
+		struct hashmap_entry *e = oldtable[i];
+
+		while (e) {
+			struct hashmap_entry *next = e->next;
+			unsigned int b = bucket(map, e);
+
+			e->next = map->table[b];
+			map->table[b] = e;
+			e = next;
+		}
+	}
+	free(oldtable);
+}
+
+static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map,
+						    const struct hashmap_entry *key,
+						    const void *keydata)
+{
+	struct hashmap_entry **e = &map->table[bucket(map, key)];
+
+	while (*e && !entry_equals(map, *e, key, keydata))
+		e = &(*e)->next;
+	return e;
+}
+
+static int always_equal(const void *unused1, const void *unused2, const void *unused3)
+{
+	return 0;
+}
+
+void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
+		  size_t initial_size)
+{
+	unsigned int size = HASHMAP_INITIAL_SIZE;
+
+	map->size = 0;
+	map->cmpfn = equals_function ? equals_function : always_equal;
+
+	/* calculate initial table size and allocate the table */
+	initial_size = (unsigned int)((uint64_t)initial_size * 100
+			/ HASHMAP_LOAD_FACTOR);
+	while (initial_size > size)
+		size <<= HASHMAP_RESIZE_BITS;
+	alloc_table(map, size);
+}
+
+void hashmap_free(struct hashmap *map, int free_entries)
+{
+	if (!map || !map->table)
+		return;
+	if (free_entries) {
+		struct hashmap_iter iter;
+		struct hashmap_entry *e;
+
+		hashmap_iter_init(map, &iter);
+		while ((e = hashmap_iter_next(&iter)))
+			free(e);
+	}
+	free(map->table);
+	memset(map, 0, sizeof(*map));
+}
+
+void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)
+{
+	return *find_entry_ptr(map, key, keydata);
+}
+
+void *hashmap_get_next(const struct hashmap *map, const void *entry)
+{
+	struct hashmap_entry *e = ((struct hashmap_entry *)entry)->next;
+
+	for (; e; e = e->next)
+		if (entry_equals(map, entry, e, NULL))
+			return e;
+	return NULL;
+}
+
+void hashmap_add(struct hashmap *map, void *entry)
+{
+	unsigned int b = bucket(map, entry);
+
+	/* add entry */
+	((struct hashmap_entry *)entry)->next = map->table[b];
+	map->table[b] = entry;
+
+	/* fix size and rehash if appropriate */
+	map->size++;
+	if (map->size > map->grow_at)
+		rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
+}
+
+void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
+{
+	struct hashmap_entry *old;
+	struct hashmap_entry **e = find_entry_ptr(map, key, keydata);
+
+	if (!*e)
+		return NULL;
+
+	/* remove existing entry */
+	old = *e;
+	*e = old->next;
+	old->next = NULL;
+
+	/* fix size and rehash if appropriate */
+	map->size--;
+	if (map->size < map->shrink_at)
+		rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
+	return old;
+}
+
+void *hashmap_put(struct hashmap *map, void *entry)
+{
+	struct hashmap_entry *old = hashmap_remove(map, entry, NULL);
+
+	hashmap_add(map, entry);
+	return old;
+}
+
+void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)
+{
+	iter->map = map;
+	iter->tablepos = 0;
+	iter->next = NULL;
+}
+
+void *hashmap_iter_next(struct hashmap_iter *iter)
+{
+	struct hashmap_entry *current = iter->next;
+
+	for (;;) {
+		if (current) {
+			iter->next = current->next;
+			return current;
+		}
+
+		if (iter->tablepos >= iter->map->tablesize)
+			return NULL;
+
+		current = iter->map->table[iter->tablepos++];
+	}
+}
+
+struct pool_entry {
+	struct hashmap_entry ent;
+	size_t len;
+	unsigned char data[FLEX_ARRAY];
+};
+
+static int pool_entry_cmp(const struct pool_entry *e1,
+			  const struct pool_entry *e2,
+			  const unsigned char *keydata)
+{
+	return e1->data != keydata &&
+	       (e1->len != e2->len || memcmp(e1->data, keydata, e1->len));
+}
+
+const void *memintern(const void *data, size_t len)
+{
+	static struct hashmap map;
+	struct pool_entry key, *e;
+
+	/* initialize string pool hashmap */
+	if (!map.tablesize)
+		hashmap_init(&map, (hashmap_cmp_fn)pool_entry_cmp, 0);
+
+	/* lookup interned string in pool */
+	hashmap_entry_init(&key, memhash(data, len));
+	key.len = len;
+	e = hashmap_get(&map, &key, data);
+	if (!e) {
+		/* not found: create it */
+		FLEX_ALLOC_MEM(e, data, data, len);
+		hashmap_entry_init(e, key.ent.hash);
+		e->len = len;
+		hashmap_add(&map, e);
+	}
+	return e->data;
+}
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 3/5] erofs-utils: introduce sha256
  2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 2/5] erofs-utils: introduce hashmap from git source Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 4/5] erofs-utils: introduce copy_file_range Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
  4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao

A simple sha256 approach copied from e2fsprogs.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/Makefile.am |   2 +-
 lib/sha256.c    | 248 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 249 insertions(+), 1 deletion(-)
 create mode 100644 lib/sha256.c

diff --git a/lib/Makefile.am b/lib/Makefile.am
index 7d00bf5fafdc..2638a109c29c 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -22,7 +22,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
-		      compress_hints.c hashmap.c
+		      compress_hints.c hashmap.c sha256.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/sha256.c b/lib/sha256.c
new file mode 100644
index 000000000000..dd0e058662ff
--- /dev/null
+++ b/lib/sha256.c
@@ -0,0 +1,248 @@
+/*
+ * sha256.c --- The sha256 algorithm
+ *
+ * Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
+ * (copied from libtomcrypt and then relicensed under GPLv2)
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Library
+ * General Public License, version 2.
+ * %End-Header%
+ */
+#include "erofs/defs.h"
+#include <string.h>
+
+static const __u32 K[64] = {
+    0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
+    0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
+    0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
+    0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+    0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
+    0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
+    0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
+    0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+    0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
+    0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
+    0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
+    0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+    0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
+
+/* Various logical functions */
+#define Ch(x,y,z)       (z ^ (x & (y ^ z)))
+#define Maj(x,y,z)      (((x | y) & z) | (x & y))
+#define S(x, n)         RORc((x),(n))
+#define R(x, n)         (((x)&0xFFFFFFFFUL)>>(n))
+#define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
+#define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
+#define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
+#define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+#define RORc(x, y) ( ((((__u32)(x)&0xFFFFFFFFUL)>>(__u32)((y)&31)) | ((__u32)(x)<<(__u32)(32-((y)&31)))) & 0xFFFFFFFFUL)
+
+#define RND(a,b,c,d,e,f,g,h,i)                         \
+     t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i];   \
+     t1 = Sigma0(a) + Maj(a, b, c);                    \
+     d += t0;                                          \
+     h  = t0 + t1;
+
+#define STORE64H(x, y) \
+	do { \
+		(y)[0] = (unsigned char)(((x)>>56)&255);\
+		(y)[1] = (unsigned char)(((x)>>48)&255);\
+		(y)[2] = (unsigned char)(((x)>>40)&255);\
+		(y)[3] = (unsigned char)(((x)>>32)&255);\
+		(y)[4] = (unsigned char)(((x)>>24)&255);\
+		(y)[5] = (unsigned char)(((x)>>16)&255);\
+		(y)[6] = (unsigned char)(((x)>>8)&255);\
+		(y)[7] = (unsigned char)((x)&255); } while(0)
+
+#define STORE32H(x, y)                                                                     \
+  do { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned char)(((x)>>16)&255);   \
+       (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } while(0)
+
+#define LOAD32H(x, y)                            \
+  do { x = ((__u32)((y)[0] & 255)<<24) | \
+           ((__u32)((y)[1] & 255)<<16) | \
+           ((__u32)((y)[2] & 255)<<8)  | \
+           ((__u32)((y)[3] & 255)); } while(0)
+
+struct sha256_state {
+    __u64 length;
+    __u32 state[8], curlen;
+    unsigned char buf[64];
+};
+
+/* This is a highly simplified version from libtomcrypt */
+struct hash_state {
+	struct sha256_state sha256;
+};
+
+static void sha256_compress(struct hash_state * md, const unsigned char *buf)
+{
+    __u32 S[8], W[64], t0, t1;
+    __u32 t;
+    int i;
+
+    /* copy state into S */
+    for (i = 0; i < 8; i++) {
+        S[i] = md->sha256.state[i];
+    }
+
+    /* copy the state into 512-bits into W[0..15] */
+    for (i = 0; i < 16; i++) {
+        LOAD32H(W[i], buf + (4*i));
+    }
+
+    /* fill W[16..63] */
+    for (i = 16; i < 64; i++) {
+        W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
+    }
+
+    /* Compress */
+     for (i = 0; i < 64; ++i) {
+         RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i);
+         t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
+         S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
+     }
+
+    /* feedback */
+    for (i = 0; i < 8; i++) {
+        md->sha256.state[i] = md->sha256.state[i] + S[i];
+    }
+}
+
+static void sha256_init(struct hash_state * md)
+{
+    md->sha256.curlen = 0;
+    md->sha256.length = 0;
+    md->sha256.state[0] = 0x6A09E667UL;
+    md->sha256.state[1] = 0xBB67AE85UL;
+    md->sha256.state[2] = 0x3C6EF372UL;
+    md->sha256.state[3] = 0xA54FF53AUL;
+    md->sha256.state[4] = 0x510E527FUL;
+    md->sha256.state[5] = 0x9B05688CUL;
+    md->sha256.state[6] = 0x1F83D9ABUL;
+    md->sha256.state[7] = 0x5BE0CD19UL;
+}
+
+#define MIN(x, y) ( ((x)<(y))?(x):(y) )
+#define SHA256_BLOCKSIZE 64
+static void sha256_process(struct hash_state * md, const unsigned char *in, unsigned long inlen)
+{
+    unsigned long n;
+
+    while (inlen > 0) {
+	    if (md->sha256.curlen == 0 && inlen >= SHA256_BLOCKSIZE) {
+		    sha256_compress(md, in);
+		    md->sha256.length += SHA256_BLOCKSIZE * 8;
+		    in += SHA256_BLOCKSIZE;
+		    inlen -= SHA256_BLOCKSIZE;
+	    } else {
+		    n = MIN(inlen, (SHA256_BLOCKSIZE - md->sha256.curlen));
+		    memcpy(md->sha256.buf + md->sha256.curlen, in, (size_t)n);
+		    md->sha256.curlen += n;
+		    in += n;
+		    inlen -= n;
+		    if (md->sha256.curlen == SHA256_BLOCKSIZE) {
+			    sha256_compress(md, md->sha256.buf);
+			    md->sha256.length += 8*SHA256_BLOCKSIZE;
+			    md->sha256.curlen = 0;
+		    }
+	    }
+    }
+}
+
+static void sha256_done(struct hash_state * md, unsigned char *out)
+{
+    int i;
+
+    /* increase the length of the message */
+    md->sha256.length += md->sha256.curlen * 8;
+
+    /* append the '1' bit */
+    md->sha256.buf[md->sha256.curlen++] = (unsigned char)0x80;
+
+    /* if the length is currently above 56 bytes we append zeros
+     * then compress.  Then we can fall back to padding zeros and length
+     * encoding like normal.
+     */
+    if (md->sha256.curlen > 56) {
+        while (md->sha256.curlen < 64) {
+            md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
+        }
+        sha256_compress(md, md->sha256.buf);
+        md->sha256.curlen = 0;
+    }
+
+    /* pad upto 56 bytes of zeroes */
+    while (md->sha256.curlen < 56) {
+        md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
+    }
+
+    /* store length */
+    STORE64H(md->sha256.length, md->sha256.buf+56);
+    sha256_compress(md, md->sha256.buf);
+
+    /* copy output */
+    for (i = 0; i < 8; i++) {
+        STORE32H(md->sha256.state[i], out+(4*i));
+    }
+}
+
+void erofs_sha256(const unsigned char *in, unsigned long in_size,
+		  unsigned char out[32])
+{
+	struct hash_state md;
+
+	sha256_init(&md);
+	sha256_process(&md, in, in_size);
+	sha256_done(&md, out);
+}
+
+#ifdef UNITTEST
+static const struct {
+	char *msg;
+	unsigned char hash[32];
+} tests[] = {
+	{ "",
+	  { 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
+	    0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
+	    0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
+	    0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 }
+	},
+	{ "abc",
+	  { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea,
+	    0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23,
+	    0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
+	    0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad }
+	},
+	{ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+	  { 0x24, 0x8d, 0x6a, 0x61, 0xd2, 0x06, 0x38, 0xb8,
+	    0xe5, 0xc0, 0x26, 0x93, 0x0c, 0x3e, 0x60, 0x39,
+	    0xa3, 0x3c, 0xe4, 0x59, 0x64, 0xff, 0x21, 0x67,
+	    0xf6, 0xec, 0xed, 0xd4, 0x19, 0xdb, 0x06, 0xc1 }
+	},
+};
+
+int main(int argc, char **argv)
+{
+	int i;
+	int errors = 0;
+	unsigned char tmp[32];
+
+	for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) {
+		unsigned char *msg = (unsigned char *) tests[i].msg;
+		int len = strlen(tests[i].msg);
+
+		erofs_sha256(msg, len, tmp);
+		printf("SHA256 test message %d: ", i);
+		if (memcmp(tmp, tests[i].hash, 32) != 0) {
+			printf("FAILED\n");
+			errors++;
+		} else
+			printf("OK\n");
+	}
+	return errors;
+}
+
+#endif /* UNITTEST */
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 4/5] erofs-utils: introduce copy_file_range
  2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
                   ` (2 preceding siblings ...)
  2021-09-22 18:56 ` [PATCH v3 3/5] erofs-utils: introduce sha256 Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
  2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
  4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao

Add copy_file_range support. Emulate it instead if libc
doesn't support it or have no emulation.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 configure.ac       |  1 +
 include/erofs/io.h |  5 +++
 lib/io.c           | 95 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+)

diff --git a/configure.ac b/configure.ac
index a749db0aed65..9d7d5c22e53f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -158,6 +158,7 @@ AC_CHECK_DECL(lseek64,[AC_DEFINE(HAVE_LSEEK64_PROTOTYPE, 1,
 # Checks for library functions.
 AC_CHECK_FUNCS(m4_flatten([
 	backtrace
+	copy_file_range
 	fallocate
 	gettimeofday
 	lgetxattr
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 0763baf50dc3..2597bf48a1c4 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -7,6 +7,7 @@
 #ifndef __EROFS_IO_H
 #define __EROFS_IO_H
 
+#define _GNU_SOURCE
 #include <unistd.h>
 #include "internal.h"
 
@@ -24,6 +25,10 @@ int dev_fsync(void);
 int dev_resize(erofs_blk_t nblocks);
 u64 dev_length(void);
 
+int erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
+                          int fd_out, erofs_off_t *off_out,
+                          size_t length);
+
 static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
 			    u32 nblocks)
 {
diff --git a/lib/io.c b/lib/io.c
index 620cb9c960e1..504a69e4bdc1 100644
--- a/lib/io.c
+++ b/lib/io.c
@@ -258,3 +258,98 @@ int dev_read(void *buf, u64 offset, size_t len)
 	}
 	return 0;
 }
+
+static int __erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
+				   int fd_out, erofs_off_t *off_out,
+				   size_t length)
+{
+	size_t copied = 0;
+	char buf[8192];
+
+	/*
+	 * Main copying loop.  The buffer size is arbitrary and is a
+	 * trade-off between stack size consumption, cache usage, and
+	 * amortization of system call overhead.
+	 */
+	while (length > 0) {
+		size_t to_read;
+		ssize_t read_count;
+		char *end, *p;
+
+		to_read = min_t(size_t, length, sizeof(buf));
+#ifdef HAVE_PREAD64
+		read_count = pread64(fd_in, buf, to_read, *off_in);
+#else
+		read_count = pread(fd_in, buf, to_read, *off_in);
+#endif
+		if (read_count == 0)
+			/* End of file reached prematurely. */
+			return copied;
+		if (read_count < 0) {
+			/* Report the number of bytes copied so far. */
+			if (copied > 0)
+				return copied;
+			return -1;
+		}
+		*off_in += read_count;
+
+		/* Write the buffer part which was read to the destination. */
+		end = buf + read_count;
+		for (p = buf; p < end; ) {
+			ssize_t write_count;
+
+#ifdef HAVE_PWRITE64
+			write_count = pwrite64(fd_out, p, end - p, *off_out);
+#else
+			write_count = pwrite(fd_out, p, end - p, *off_out);
+#endif
+			if (write_count < 0) {
+				/*
+				 * Adjust the input read position to match what
+				 * we have written, so that the caller can pick
+				 * up after the error.
+				 */
+				size_t written = p - buf;
+				/*
+				 * NB: This needs to be signed so that we can
+				 * form the negative value below.
+				 */
+				ssize_t overread = read_count - written;
+
+				*off_in -= overread;
+				/* Report the number of bytes copied so far. */
+				if (copied + written > 0)
+					return copied + written;
+				return -1;
+			}
+			p += write_count;
+			*off_out += write_count;
+		} /* Write loop.  */
+		copied += read_count;
+		length -= read_count;
+	}
+	return copied;
+}
+
+int erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
+			  int fd_out, erofs_off_t *off_out,
+			  size_t length)
+{
+#ifdef HAVE_COPY_FILE_RANGE
+	off64_t off64_in = *off_in, off64_out = *off_out;
+	ssize_t ret;
+
+	ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out,
+                              length, 0);
+	if (ret >= 0)
+		goto out;
+	if (errno != ENOSYS) {
+		ret = -errno;
+out:
+		*off_in = off64_in;
+		*off_out = off64_out;
+		return ret;
+	}
+#endif
+	return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length);
+}
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files
  2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
                   ` (3 preceding siblings ...)
  2021-09-22 18:56 ` [PATCH v3 4/5] erofs-utils: introduce copy_file_range Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
  2021-09-22 19:07   ` Gao Xiang
  4 siblings, 1 reply; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
  To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao

mkfs support for the new chunk-based uncompressed files,
including:
 * chunk-based files with 4-byte block address array;
 * chunk-based files with 8-byte inode chunk indexes.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/erofs/blobchunk.h |  18 ++++
 include/erofs/config.h    |   1 +
 include/erofs/defs.h      |  77 ++++++++++++++
 include/erofs/hashtable.h |  77 --------------
 include/erofs/internal.h  |   1 +
 include/erofs/io.h        |   2 +
 lib/Makefile.am           |   2 +-
 lib/blobchunk.c           | 217 ++++++++++++++++++++++++++++++++++++++
 lib/inode.c               |  36 +++++--
 lib/io.c                  |   2 +-
 man/mkfs.erofs.1          |   3 +
 mkfs/main.c               |  38 +++++++
 12 files changed, 389 insertions(+), 85 deletions(-)
 create mode 100644 include/erofs/blobchunk.h
 create mode 100644 lib/blobchunk.c

diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h
new file mode 100644
index 000000000000..b418227e0ef8
--- /dev/null
+++ b/include/erofs/blobchunk.h
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/blobchunk.h
+ *
+ * Copyright (C) 2021, Alibaba Cloud
+ */
+#ifndef __EROFS_BLOBCHUNK_H
+#define __EROFS_BLOBCHUNK_H
+
+#include "erofs/internal.h"
+
+int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off);
+int erofs_blob_write_chunked_file(struct erofs_inode *inode);
+int erofs_blob_remap(void);
+void erofs_blob_exit(void);
+int erofs_blob_init(void);
+
+#endif
diff --git a/include/erofs/config.h b/include/erofs/config.h
index d5d9b5a751c0..574dd52be12d 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -42,6 +42,7 @@ struct erofs_configure {
 	bool c_random_pclusterblks;
 #endif
 	char c_timeinherit;
+	char c_chunkbits;
 	bool c_noinline_data;
 
 #ifdef HAVE_LIBSELINUX
diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index 6e0a7774871c..96bbb6574ff3 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -175,6 +175,83 @@ static inline u32 get_unaligned_le32(const u8 *p)
 	return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
 }
 
+/**
+ * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
+ * @n - parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ *   the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n)			\
+(					\
+	(n) & (1ULL << 63) ? 63 :	\
+	(n) & (1ULL << 62) ? 62 :	\
+	(n) & (1ULL << 61) ? 61 :	\
+	(n) & (1ULL << 60) ? 60 :	\
+	(n) & (1ULL << 59) ? 59 :	\
+	(n) & (1ULL << 58) ? 58 :	\
+	(n) & (1ULL << 57) ? 57 :	\
+	(n) & (1ULL << 56) ? 56 :	\
+	(n) & (1ULL << 55) ? 55 :	\
+	(n) & (1ULL << 54) ? 54 :	\
+	(n) & (1ULL << 53) ? 53 :	\
+	(n) & (1ULL << 52) ? 52 :	\
+	(n) & (1ULL << 51) ? 51 :	\
+	(n) & (1ULL << 50) ? 50 :	\
+	(n) & (1ULL << 49) ? 49 :	\
+	(n) & (1ULL << 48) ? 48 :	\
+	(n) & (1ULL << 47) ? 47 :	\
+	(n) & (1ULL << 46) ? 46 :	\
+	(n) & (1ULL << 45) ? 45 :	\
+	(n) & (1ULL << 44) ? 44 :	\
+	(n) & (1ULL << 43) ? 43 :	\
+	(n) & (1ULL << 42) ? 42 :	\
+	(n) & (1ULL << 41) ? 41 :	\
+	(n) & (1ULL << 40) ? 40 :	\
+	(n) & (1ULL << 39) ? 39 :	\
+	(n) & (1ULL << 38) ? 38 :	\
+	(n) & (1ULL << 37) ? 37 :	\
+	(n) & (1ULL << 36) ? 36 :	\
+	(n) & (1ULL << 35) ? 35 :	\
+	(n) & (1ULL << 34) ? 34 :	\
+	(n) & (1ULL << 33) ? 33 :	\
+	(n) & (1ULL << 32) ? 32 :	\
+	(n) & (1ULL << 31) ? 31 :	\
+	(n) & (1ULL << 30) ? 30 :	\
+	(n) & (1ULL << 29) ? 29 :	\
+	(n) & (1ULL << 28) ? 28 :	\
+	(n) & (1ULL << 27) ? 27 :	\
+	(n) & (1ULL << 26) ? 26 :	\
+	(n) & (1ULL << 25) ? 25 :	\
+	(n) & (1ULL << 24) ? 24 :	\
+	(n) & (1ULL << 23) ? 23 :	\
+	(n) & (1ULL << 22) ? 22 :	\
+	(n) & (1ULL << 21) ? 21 :	\
+	(n) & (1ULL << 20) ? 20 :	\
+	(n) & (1ULL << 19) ? 19 :	\
+	(n) & (1ULL << 18) ? 18 :	\
+	(n) & (1ULL << 17) ? 17 :	\
+	(n) & (1ULL << 16) ? 16 :	\
+	(n) & (1ULL << 15) ? 15 :	\
+	(n) & (1ULL << 14) ? 14 :	\
+	(n) & (1ULL << 13) ? 13 :	\
+	(n) & (1ULL << 12) ? 12 :	\
+	(n) & (1ULL << 11) ? 11 :	\
+	(n) & (1ULL << 10) ? 10 :	\
+	(n) & (1ULL <<  9) ?  9 :	\
+	(n) & (1ULL <<  8) ?  8 :	\
+	(n) & (1ULL <<  7) ?  7 :	\
+	(n) & (1ULL <<  6) ?  6 :	\
+	(n) & (1ULL <<  5) ?  5 :	\
+	(n) & (1ULL <<  4) ?  4 :	\
+	(n) & (1ULL <<  3) ?  3 :	\
+	(n) & (1ULL <<  2) ?  2 :	\
+	(n) & (1ULL <<  1) ?  1 : 0	\
+)
+
 #ifndef __always_inline
 #define __always_inline	inline
 #endif
diff --git a/include/erofs/hashtable.h b/include/erofs/hashtable.h
index a71cb0044816..90eb84ee8598 100644
--- a/include/erofs/hashtable.h
+++ b/include/erofs/hashtable.h
@@ -262,83 +262,6 @@ static __always_inline u32 hash_64(u64 val, unsigned int bits)
 #endif
 }
 
-/**
- * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
- * @n - parameter
- *
- * constant-capable log of base 2 calculation
- * - this can be used to initialise global variables from constant data, hence
- *   the massive ternary operator construction
- *
- * selects the appropriately-sized optimised version depending on sizeof(n)
- */
-#define ilog2(n)				\
-(								\
-	(n) & (1ULL << 63) ? 63 :	\
-	(n) & (1ULL << 62) ? 62 :	\
-	(n) & (1ULL << 61) ? 61 :	\
-	(n) & (1ULL << 60) ? 60 :	\
-	(n) & (1ULL << 59) ? 59 :	\
-	(n) & (1ULL << 58) ? 58 :	\
-	(n) & (1ULL << 57) ? 57 :	\
-	(n) & (1ULL << 56) ? 56 :	\
-	(n) & (1ULL << 55) ? 55 :	\
-	(n) & (1ULL << 54) ? 54 :	\
-	(n) & (1ULL << 53) ? 53 :	\
-	(n) & (1ULL << 52) ? 52 :	\
-	(n) & (1ULL << 51) ? 51 :	\
-	(n) & (1ULL << 50) ? 50 :	\
-	(n) & (1ULL << 49) ? 49 :	\
-	(n) & (1ULL << 48) ? 48 :	\
-	(n) & (1ULL << 47) ? 47 :	\
-	(n) & (1ULL << 46) ? 46 :	\
-	(n) & (1ULL << 45) ? 45 :	\
-	(n) & (1ULL << 44) ? 44 :	\
-	(n) & (1ULL << 43) ? 43 :	\
-	(n) & (1ULL << 42) ? 42 :	\
-	(n) & (1ULL << 41) ? 41 :	\
-	(n) & (1ULL << 40) ? 40 :	\
-	(n) & (1ULL << 39) ? 39 :	\
-	(n) & (1ULL << 38) ? 38 :	\
-	(n) & (1ULL << 37) ? 37 :	\
-	(n) & (1ULL << 36) ? 36 :	\
-	(n) & (1ULL << 35) ? 35 :	\
-	(n) & (1ULL << 34) ? 34 :	\
-	(n) & (1ULL << 33) ? 33 :	\
-	(n) & (1ULL << 32) ? 32 :	\
-	(n) & (1ULL << 31) ? 31 :	\
-	(n) & (1ULL << 30) ? 30 :	\
-	(n) & (1ULL << 29) ? 29 :	\
-	(n) & (1ULL << 28) ? 28 :	\
-	(n) & (1ULL << 27) ? 27 :	\
-	(n) & (1ULL << 26) ? 26 :	\
-	(n) & (1ULL << 25) ? 25 :	\
-	(n) & (1ULL << 24) ? 24 :	\
-	(n) & (1ULL << 23) ? 23 :	\
-	(n) & (1ULL << 22) ? 22 :	\
-	(n) & (1ULL << 21) ? 21 :	\
-	(n) & (1ULL << 20) ? 20 :	\
-	(n) & (1ULL << 19) ? 19 :	\
-	(n) & (1ULL << 18) ? 18 :	\
-	(n) & (1ULL << 17) ? 17 :	\
-	(n) & (1ULL << 16) ? 16 :	\
-	(n) & (1ULL << 15) ? 15 :	\
-	(n) & (1ULL << 14) ? 14 :	\
-	(n) & (1ULL << 13) ? 13 :	\
-	(n) & (1ULL << 12) ? 12 :	\
-	(n) & (1ULL << 11) ? 11 :	\
-	(n) & (1ULL << 10) ? 10 :	\
-	(n) & (1ULL <<  9) ?  9 :	\
-	(n) & (1ULL <<  8) ?  8 :	\
-	(n) & (1ULL <<  7) ?  7 :	\
-	(n) & (1ULL <<  6) ?  6 :	\
-	(n) & (1ULL <<  5) ?  5 :	\
-	(n) & (1ULL <<  4) ?  4 :	\
-	(n) & (1ULL <<  3) ?  3 :	\
-	(n) & (1ULL <<  2) ?  2 :	\
-	(n) & (1ULL <<  1) ?  1 : 0	\
-)
-
 #define DEFINE_HASHTABLE(name, bits)					\
 	struct hlist_head name[1 << (bits)] =				\
 			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 8621f3426410..8b154edb9f88 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -165,6 +165,7 @@ struct erofs_inode {
 
 	union {
 		void *compressmeta;
+		void *chunkindexes;
 		struct {
 			uint16_t z_advise;
 			uint8_t  z_algorithmtype[2];
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 2597bf48a1c4..2597c5c0eb96 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -25,6 +25,8 @@ int dev_fsync(void);
 int dev_resize(erofs_blk_t nblocks);
 u64 dev_length(void);
 
+extern int erofs_devfd;
+
 int erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
                           int fd_out, erofs_off_t *off_out,
                           size_t length);
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 2638a109c29c..b64d90b3e144 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -22,7 +22,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
 noinst_HEADERS += compressor.h
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
-		      compress_hints.c hashmap.c sha256.c
+		      compress_hints.c hashmap.c sha256.c blobchunk.c
 liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
 if ENABLE_LZ4
 liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
new file mode 100644
index 000000000000..e05d0cb08252
--- /dev/null
+++ b/lib/blobchunk.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/blobchunk.c
+ *
+ * Copyright (C) 2021, Alibaba Cloud
+ */
+#define _GNU_SOURCE
+#include "erofs/hashmap.h"
+#include "erofs/blobchunk.h"
+#include "erofs/cache.h"
+#include "erofs/io.h"
+#include <unistd.h>
+
+void erofs_sha256(const unsigned char *in, unsigned long in_size,
+		  unsigned char out[32]);
+
+struct erofs_blobchunk {
+	struct hashmap_entry ent;
+	char		sha256[32];
+	unsigned int	chunksize;
+	erofs_blk_t	blkaddr;
+};
+
+static struct hashmap blob_hashmap;
+static FILE *blobfile;
+static erofs_blk_t remapped_base;
+
+static struct erofs_blobchunk *erofs_blob_getchunk(int fd,
+		unsigned int chunksize)
+{
+	static u8 zeroed[EROFS_BLKSIZ];
+	u8 *chunkdata, sha256[32];
+	int ret;
+	unsigned int hash;
+	erofs_off_t blkpos;
+	struct erofs_blobchunk *chunk;
+
+	chunkdata = malloc(chunksize);
+	if (!chunkdata)
+		return ERR_PTR(-ENOMEM);
+
+	ret = read(fd, chunkdata, chunksize);
+	if (ret < chunksize) {
+		chunk = ERR_PTR(-EIO);
+		goto out;
+	}
+	erofs_sha256(chunkdata, chunksize, sha256);
+	hash = memhash(sha256, sizeof(sha256));
+	chunk = hashmap_get_from_hash(&blob_hashmap, hash, sha256);
+	if (chunk) {
+		DBG_BUGON(chunksize != chunk->chunksize);
+		goto out;
+	}
+	chunk = malloc(sizeof(struct erofs_blobchunk));
+	if (!chunk) {
+		chunk = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	chunk->chunksize = chunksize;
+	blkpos = ftell(blobfile);
+	DBG_BUGON(erofs_blkoff(blkpos));
+	chunk->blkaddr = erofs_blknr(blkpos);
+	memcpy(chunk->sha256, sha256, sizeof(sha256));
+	hashmap_entry_init(&chunk->ent, hash);
+	hashmap_add(&blob_hashmap, chunk);
+
+	erofs_dbg("Writing chunk (%u bytes) to %u", chunksize, chunk->blkaddr);
+	ret = fwrite(chunkdata, chunksize, 1, blobfile);
+	if (ret == 1 && erofs_blkoff(chunksize))
+		ret = fwrite(zeroed, EROFS_BLKSIZ - erofs_blkoff(chunksize),
+			     1, blobfile);
+	if (ret < 1) {
+		struct hashmap_entry key;
+
+		hashmap_entry_init(&key, hash);
+		hashmap_remove(&blob_hashmap, &key, sha256);
+		chunk = ERR_PTR(-ENOSPC);
+		goto out;
+	}
+out:
+	free(chunkdata);
+	return chunk;
+}
+
+static int erofs_blob_hashmap_cmp(const void *a, const void *b,
+				  const void *key)
+{
+	const struct erofs_blobchunk *ec1 =
+			container_of((struct hashmap_entry *)a,
+				     struct erofs_blobchunk, ent);
+	const struct erofs_blobchunk *ec2 =
+			container_of((struct hashmap_entry *)b,
+				     struct erofs_blobchunk, ent);
+
+	return memcmp(ec1->sha256, key ? key : ec2->sha256,
+		      sizeof(ec1->sha256));
+}
+
+int erofs_blob_write_chunk_indexes(struct erofs_inode *inode,
+				   erofs_off_t off)
+{
+	struct erofs_inode_chunk_index idx = {0};
+	unsigned int dst, src, unit;
+
+	if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+		unit = sizeof(struct erofs_inode_chunk_index);
+	else
+		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+
+	for (dst = src = 0; dst < inode->extent_isize;
+	     src += sizeof(void *), dst += unit) {
+		struct erofs_blobchunk *chunk;
+
+		chunk = *(void **)(inode->chunkindexes + src);
+
+		idx.blkaddr = chunk->blkaddr + remapped_base;
+		if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE)
+			memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit);
+		else
+			memcpy(inode->chunkindexes + dst, &idx, sizeof(idx));
+	}
+	off = roundup(off, unit);
+
+	return dev_write(inode->chunkindexes, off, inode->extent_isize);
+}
+
+int erofs_blob_write_chunked_file(struct erofs_inode *inode)
+{
+	unsigned int chunksize = 1 << cfg.c_chunkbits;
+	unsigned int count = DIV_ROUND_UP(inode->i_size, chunksize);
+	struct erofs_inode_chunk_index *idx;
+	erofs_off_t pos, len;
+	unsigned int unit;
+	int fd, ret;
+
+	inode->u.chunkformat |= inode->u.chunkbits - LOG_BLOCK_SIZE;
+
+	if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+		unit = sizeof(struct erofs_inode_chunk_index);
+	else
+		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+
+	inode->extent_isize = count * unit;
+	idx = malloc(count * max(sizeof(*idx), sizeof(void *)));
+	if (!idx)
+		return -ENOMEM;
+	inode->chunkindexes = idx;
+
+	fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
+	if (fd < 0) {
+		ret = -errno;
+		goto err;
+	}
+
+	for (pos = 0; pos < inode->i_size; pos += len) {
+		struct erofs_blobchunk *chunk;
+
+		len = min_t(u64, inode->i_size - pos, chunksize);
+		chunk = erofs_blob_getchunk(fd, len);
+		if (IS_ERR(chunk)) {
+			ret = PTR_ERR(chunk);
+			close(fd);
+			goto err;
+		}
+		*(void **)idx++ = chunk;
+	}
+	inode->datalayout = EROFS_INODE_CHUNK_BASED;
+	close(fd);
+	return 0;
+err:
+	free(inode->chunkindexes);
+	inode->chunkindexes = NULL;
+	return ret;
+}
+
+int erofs_blob_remap(void)
+{
+	struct erofs_buffer_head *bh;
+	ssize_t length;
+	erofs_off_t pos_in, pos_out;
+	int ret;
+
+	fflush(blobfile);
+	length = ftell(blobfile);
+	bh = erofs_balloc(DATA, length, 0, 0);
+	if (IS_ERR(bh))
+		return PTR_ERR(bh);
+
+	erofs_mapbh(bh->block);
+	pos_out = erofs_btell(bh, false);
+	pos_in = 0;
+	remapped_base = erofs_blknr(pos_out);
+	ret = erofs_copy_file_range(fileno(blobfile), &pos_in,
+				    erofs_devfd, &pos_out, length);
+	bh->op = &erofs_skip_write_bhops;
+	erofs_bdrop(bh, false);
+	return ret < length ? -EIO : 0;
+}
+
+void erofs_blob_exit(void)
+{
+	if (blobfile)
+		fclose(blobfile);
+
+	hashmap_free(&blob_hashmap, 1);
+}
+
+int erofs_blob_init(void)
+{
+	blobfile = tmpfile64();
+	if (!blobfile)
+		return -ENOMEM;
+
+	hashmap_init(&blob_hashmap, erofs_blob_hashmap_cmp, 0);
+	return 0;
+}
diff --git a/lib/inode.c b/lib/inode.c
index 4c40c348aa4b..26ffa4b2bb38 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -24,6 +24,7 @@
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
+#include "erofs/blobchunk.h"
 
 #define S_SHIFT                 12
 static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -387,6 +388,12 @@ int erofs_write_file(struct erofs_inode *inode)
 		return 0;
 	}
 
+	if (cfg.c_chunkbits) {
+		inode->u.chunkbits = cfg.c_chunkbits;
+		inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES;
+		return erofs_blob_write_chunked_file(inode);
+	}
+
 	if (cfg.c_compr_alg_master && erofs_file_is_compressible(inode)) {
 		ret = erofs_write_compressed_file(inode);
 
@@ -440,6 +447,10 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
 			if (is_inode_layout_compression(inode))
 				u.dic.i_u.compressed_blocks =
 					cpu_to_le32(inode->u.i_blocks);
+			else if (inode->datalayout ==
+					EROFS_INODE_CHUNK_BASED)
+				u.dic.i_u.c.format =
+					cpu_to_le16(inode->u.chunkformat);
 			else
 				u.dic.i_u.raw_blkaddr =
 					cpu_to_le32(inode->u.i_blkaddr);
@@ -473,6 +484,10 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
 			if (is_inode_layout_compression(inode))
 				u.die.i_u.compressed_blocks =
 					cpu_to_le32(inode->u.i_blocks);
+			else if (inode->datalayout ==
+					EROFS_INODE_CHUNK_BASED)
+				u.die.i_u.c.format =
+					cpu_to_le16(inode->u.chunkformat);
 			else
 				u.die.i_u.raw_blkaddr =
 					cpu_to_le32(inode->u.i_blkaddr);
@@ -505,12 +520,19 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
 	}
 
 	if (inode->extent_isize) {
-		/* write compression metadata */
-		off = Z_EROFS_VLE_EXTENT_ALIGN(off);
-		ret = dev_write(inode->compressmeta, off, inode->extent_isize);
-		if (ret)
-			return false;
-		free(inode->compressmeta);
+		if (inode->datalayout == EROFS_INODE_CHUNK_BASED) {
+			ret = erofs_blob_write_chunk_indexes(inode, off);
+			if (ret)
+				return false;
+		} else {
+			/* write compression metadata */
+			off = Z_EROFS_VLE_EXTENT_ALIGN(off);
+			ret = dev_write(inode->compressmeta, off,
+					inode->extent_isize);
+			if (ret)
+				return false;
+			free(inode->compressmeta);
+		}
 	}
 
 	inode->bh = NULL;
@@ -565,6 +587,8 @@ static int erofs_prepare_inode_buffer(struct erofs_inode *inode)
 
 	if (is_inode_layout_compression(inode))
 		goto noinline;
+	if (inode->datalayout == EROFS_INODE_CHUNK_BASED)
+		goto noinline;
 
 	if (cfg.c_noinline_data && S_ISREG(inode->i_mode)) {
 		inode->datalayout = EROFS_INODE_FLAT_PLAIN;
diff --git a/lib/io.c b/lib/io.c
index 504a69e4bdc1..03c7e3355089 100644
--- a/lib/io.c
+++ b/lib/io.c
@@ -24,7 +24,7 @@
 #include "erofs/print.h"
 
 static const char *erofs_devname;
-static int erofs_devfd = -1;
+int erofs_devfd = -1;
 static u64 erofs_devsz;
 
 int dev_get_blkdev_size(int fd, u64 *bytes)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 1446cb56db30..3c250c118168 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -83,6 +83,9 @@ Set all file gids to \fIGID\fR.
 .B \-\-all-root
 Make all files owned by root.
 .TP
+.BI "\-\-chunksize " #
+Generate chunk-based files with #-byte chunks.
+.TP
 .B \-\-help
 Display this help and exit.
 .TP
diff --git a/mkfs/main.c b/mkfs/main.c
index addefcefea38..b61205dac91a 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -22,6 +22,7 @@
 #include "erofs/exclude.h"
 #include "erofs/block_list.h"
 #include "erofs/compress_hints.h"
+#include "erofs/blobchunk.h"
 
 #ifdef HAVE_LIBUUID
 #include <uuid.h>
@@ -44,6 +45,7 @@ static struct option long_options[] = {
 #endif
 	{"max-extent-bytes", required_argument, NULL, 9},
 	{"compress-hints", required_argument, NULL, 10},
+	{"chunksize", required_argument, NULL, 11},
 #ifdef WITH_ANDROID
 	{"mount-point", required_argument, NULL, 512},
 	{"product-out", required_argument, NULL, 513},
@@ -79,6 +81,7 @@ static void usage(void)
 #ifdef HAVE_LIBUUID
 	      " -UX                   use a given filesystem UUID\n"
 #endif
+	      " --chunksize=X         generate chunk-based files with X-byte chunks\n"
 	      " --exclude-path=X      avoid including file X (X = exact literal path)\n"
 	      " --exclude-regex=X     avoid including files that match X (X = regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -321,6 +324,26 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
 			cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
 			cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
 			break;
+		case 11:
+			i = strtol(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid chunksize %s", optarg);
+				return -EINVAL;
+			}
+			cfg.c_chunkbits = ilog2(i);
+			if ((1 << cfg.c_chunkbits) != i) {
+				erofs_err("chunksize %s must be a power of two",
+					  optarg);
+				return -EINVAL;
+			}
+			if (i < EROFS_BLKSIZ) {
+				erofs_err("chunksize %s must be larger than block size",
+					  optarg);
+				return -EINVAL;
+			}
+			erofs_sb_set_chunked_file();
+			erofs_warn("EXPERIMENTAL chunked file feature in use. Use at your own risk!");
+			break;
 
 		case 1:
 			usage();
@@ -528,6 +551,12 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	if (cfg.c_chunkbits) {
+		err = erofs_blob_init();
+		if (err)
+			return 1;
+	}
+
 	err = lstat64(cfg.c_src_path, &st);
 	if (err)
 		return 1;
@@ -622,6 +651,13 @@ int main(int argc, char **argv)
 	root_nid = erofs_lookupnid(root_inode);
 	erofs_iput(root_inode);
 
+	if (cfg.c_chunkbits) {
+		erofs_info("total metadata: %u blocks", erofs_mapbh(NULL));
+		err = erofs_blob_remap();
+		if (err)
+			goto exit;
+	}
+
 	err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks);
 	if (err)
 		goto exit;
@@ -642,6 +678,8 @@ exit:
 	dev_close();
 	erofs_cleanup_compress_hints();
 	erofs_cleanup_exclude_rules();
+	if (cfg.c_chunkbits)
+		erofs_blob_exit();
 	erofs_exit_configure();
 
 	if (err) {
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files
  2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
@ 2021-09-22 19:07   ` Gao Xiang
  0 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 19:07 UTC (permalink / raw)
  To: linux-erofs; +Cc: Liu Jiang, Liu Bo, Peng Tao

On Thu, Sep 23, 2021 at 02:56:07AM +0800, Gao Xiang wrote:
> mkfs support for the new chunk-based uncompressed files,
> including:
>  * chunk-based files with 4-byte block address array;
>  * chunk-based files with 8-byte inode chunk indexes.
> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
>  include/erofs/blobchunk.h |  18 ++++
>  include/erofs/config.h    |   1 +
>  include/erofs/defs.h      |  77 ++++++++++++++
>  include/erofs/hashtable.h |  77 --------------
>  include/erofs/internal.h  |   1 +
>  include/erofs/io.h        |   2 +
>  lib/Makefile.am           |   2 +-
>  lib/blobchunk.c           | 217 ++++++++++++++++++++++++++++++++++++++
>  lib/inode.c               |  36 +++++--
>  lib/io.c                  |   2 +-
>  man/mkfs.erofs.1          |   3 +
>  mkfs/main.c               |  38 +++++++
>  12 files changed, 389 insertions(+), 85 deletions(-)
>  create mode 100644 include/erofs/blobchunk.h
>  create mode 100644 lib/blobchunk.c
>

Applying following diff to fix up the MacOS build:

diff --git a/configure.ac b/configure.ac
index 9d7d5c2..03387f5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -170,7 +170,8 @@ AC_CHECK_FUNCS(m4_flatten([
 	strdup
 	strerror
 	strrchr
-	strtoull]))
+	strtoull
+	tmpfile64]))
 
 # Configure debug mode
 AS_IF([test "x$enable_debug" != "xno"], [], [
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index e05d0cb..725b517 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -208,7 +208,11 @@ void erofs_blob_exit(void)
 
 int erofs_blob_init(void)
 {
+#ifdef HAVE_TMPFILE64
 	blobfile = tmpfile64();
+#else
+	blobfile = tmpfile();
+#endif
 	if (!blobfile)
 		return -ENOMEM;
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-09-22 19:07 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
2021-09-22 18:56 ` [PATCH v3 2/5] erofs-utils: introduce hashmap from git source Gao Xiang
2021-09-22 18:56 ` [PATCH v3 3/5] erofs-utils: introduce sha256 Gao Xiang
2021-09-22 18:56 ` [PATCH v3 4/5] erofs-utils: introduce copy_file_range Gao Xiang
2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
2021-09-22 19:07   ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).