* [PATCH v3 0/5] erofs-utils: add support for chunk-based files
@ 2021-09-22 18:56 Gao Xiang
2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
` (4 more replies)
0 siblings, 5 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao
v1 & 2: https://lore.kernel.org/r/20210818070316.1970-2-hsiangkao@linux.alibaba.com
changes since v2:
- add erofsfuse support for chunk-based files;
- add support for 4-byte blockmap array in addition to chunk indexes;
- update manpages;
- minor cleanups.
Gao Xiang (5):
erofs-utils: fuse: support reading chunk-based uncompressed files
erofs-utils: introduce hashmap from git source
erofs-utils: introduce sha256
erofs-utils: introduce copy_file_range
erofs-utils: mkfs: support chunk-based uncompressed files
configure.ac | 1 +
include/erofs/blobchunk.h | 18 +++
include/erofs/config.h | 1 +
include/erofs/defs.h | 77 ++++++++++
include/erofs/flex-array.h | 147 +++++++++++++++++++
include/erofs/hashmap.h | 103 ++++++++++++++
include/erofs/hashtable.h | 77 ----------
include/erofs/internal.h | 6 +
include/erofs/io.h | 7 +
include/erofs_fs.h | 48 ++++++-
lib/Makefile.am | 3 +-
lib/blobchunk.c | 217 ++++++++++++++++++++++++++++
lib/data.c | 86 +++++++++--
lib/hashmap.c | 284 +++++++++++++++++++++++++++++++++++++
lib/inode.c | 36 ++++-
lib/io.c | 97 ++++++++++++-
lib/namei.c | 15 +-
lib/sha256.c | 248 ++++++++++++++++++++++++++++++++
man/mkfs.erofs.1 | 3 +
mkfs/main.c | 38 +++++
20 files changed, 1413 insertions(+), 99 deletions(-)
create mode 100644 include/erofs/blobchunk.h
create mode 100644 include/erofs/flex-array.h
create mode 100644 include/erofs/hashmap.h
create mode 100644 lib/blobchunk.c
create mode 100644 lib/hashmap.c
create mode 100644 lib/sha256.c
--
2.24.4
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files
2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
2021-09-22 18:56 ` [PATCH v3 2/5] erofs-utils: introduce hashmap from git source Gao Xiang
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao
Keep in sync with the latest kernel
commit 2a9dc7a8fec6 ("erofs: introduce chunk-based file on-disk format")
and
commit c5aa903a59db ("erofs: support reading chunk-based uncompressed files")
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
include/erofs/internal.h | 5 +++
include/erofs_fs.h | 48 ++++++++++++++++++++--
lib/data.c | 86 +++++++++++++++++++++++++++++++++++-----
lib/namei.c | 15 ++++++-
4 files changed, 140 insertions(+), 14 deletions(-)
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index f5eacea5d4d7..8621f3426410 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -109,6 +109,7 @@ static inline void erofs_sb_clear_##name(void) \
EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
+EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
#define EROFS_I_EA_INITED (1 << 0)
@@ -140,6 +141,10 @@ struct erofs_inode {
u32 i_blkaddr;
u32 i_blocks;
u32 i_rdev;
+ struct {
+ unsigned short chunkformat;
+ unsigned char chunkbits;
+ };
} u;
char i_srcpath[PATH_MAX + 1];
diff --git a/include/erofs_fs.h b/include/erofs_fs.h
index 48934bb76cec..66a68e3b2065 100644
--- a/include/erofs_fs.h
+++ b/include/erofs_fs.h
@@ -4,7 +4,7 @@
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
+ * Copyright (C) 2021, Alibaba Cloud
*/
#ifndef __EROFS_FS_H
#define __EROFS_FS_H
@@ -21,10 +21,12 @@
#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
+#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
- EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
+ EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
+ EROFS_FEATURE_INCOMPAT_CHUNKED_FILE)
#define EROFS_SB_EXTSLOT_SIZE 16
@@ -66,13 +68,16 @@ struct erofs_super_block {
* inode, [xattrs], last_inline_data, ... | ... | no-holed data
* 3 - inode compression D:
* inode, [xattrs], map_header, extents ... | ...
- * 4~7 - reserved
+ * 4 - inode chunk-based E:
+ * inode, [xattrs], chunk indexes ... | ...
+ * 5~7 - reserved
*/
enum {
EROFS_INODE_FLAT_PLAIN = 0,
EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1,
EROFS_INODE_FLAT_INLINE = 2,
EROFS_INODE_FLAT_COMPRESSION = 3,
+ EROFS_INODE_CHUNK_BASED = 4,
EROFS_INODE_DATALAYOUT_MAX
};
@@ -92,6 +97,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
#define EROFS_I_ALL \
((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
+/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */
+#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F
+/* with chunk indexes or just a 4-byte blkaddr array */
+#define EROFS_CHUNK_FORMAT_INDEXES 0x0020
+
+#define EROFS_CHUNK_FORMAT_ALL \
+ (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
+
+struct erofs_inode_chunk_info {
+ __le16 format; /* chunk blkbits, etc. */
+ __le16 reserved;
+};
+
/* 32-byte reduced form of an ondisk inode */
struct erofs_inode_compact {
__le16 i_format; /* inode format hints */
@@ -109,6 +127,9 @@ struct erofs_inode_compact {
/* for device files, used to indicate old/new device # */
__le32 rdev;
+
+ /* for chunk-based files, it contains the summary info */
+ struct erofs_inode_chunk_info c;
} i_u;
__le32 i_ino; /* only used for 32-bit stat compatibility */
__le16 i_uid;
@@ -137,6 +158,9 @@ struct erofs_inode_extended {
/* for device files, used to indicate old/new device # */
__le32 rdev;
+
+ /* for chunk-based files, it contains the summary info */
+ struct erofs_inode_chunk_info c;
} i_u;
/* only used for 32-bit stat compatibility */
@@ -206,6 +230,19 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
e->e_name_len + le16_to_cpu(e->e_value_size));
}
+/* represent a zeroed chunk (hole) */
+#define EROFS_NULL_ADDR -1
+
+/* 4-byte block address array */
+#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32)
+
+/* 8-byte inode chunk indexes */
+struct erofs_inode_chunk_index {
+ __le16 advise; /* always 0, don't care for now */
+ __le16 device_id; /* back-end storage id, always 0 for now */
+ __le32 blkaddr; /* start block address of this inode chunk */
+};
+
/* maximum supported size of a physical compression cluster */
#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
@@ -350,9 +387,14 @@ static inline void erofs_check_ondisk_layout_definitions(void)
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
+ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+ /* keep in sync between 2 index structures for better extendibility */
+ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
+ sizeof(struct z_erofs_vle_decompressed_index));
BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
diff --git a/lib/data.c b/lib/data.c
index 1a1005a67350..641d8408b54f 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -25,13 +25,6 @@ static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
lastblk = nblocks - tailendpacking;
- if (offset >= inode->i_size) {
- /* leave out-of-bound access unmapped */
- map->m_flags = 0;
- map->m_plen = 0;
- goto out;
- }
-
/* there is no hole in flatmode */
map->m_flags = EROFS_MAP_MAPPED;
@@ -62,14 +55,86 @@ static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
goto err_out;
}
-out:
map->m_llen = map->m_plen;
-
err_out:
trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
return err;
}
+static int erofs_map_blocks(struct erofs_inode *inode,
+ struct erofs_map_blocks *map, int flags)
+{
+ struct erofs_inode *vi = inode;
+ struct erofs_inode_chunk_index *idx;
+ u8 buf[EROFS_BLKSIZ];
+ u64 chunknr;
+ unsigned int unit;
+ erofs_off_t pos;
+ int err = 0;
+
+ if (map->m_la >= inode->i_size) {
+ /* leave out-of-bound access unmapped */
+ map->m_flags = 0;
+ map->m_plen = 0;
+ goto out;
+ }
+
+ if (vi->datalayout != EROFS_INODE_CHUNK_BASED)
+ return erofs_map_blocks_flatmode(inode, map, flags);
+
+ if (vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+ unit = sizeof(*idx); /* chunk index */
+ else
+ unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */
+
+ chunknr = map->m_la >> vi->u.chunkbits;
+ pos = roundup(iloc(vi->nid) + vi->inode_isize +
+ vi->xattr_isize, unit) + unit * chunknr;
+
+ err = blk_read(buf, erofs_blknr(pos), 1);
+ if (err < 0)
+ return -EIO;
+
+ map->m_la = chunknr << vi->u.chunkbits;
+ map->m_plen = min_t(erofs_off_t, 1UL << vi->u.chunkbits,
+ roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
+
+ /* handle block map */
+ if (!(vi->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
+ __le32 *blkaddr = (void *)buf + erofs_blkoff(pos);
+
+ if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
+ map->m_flags = 0;
+ } else {
+ map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
+ map->m_flags = EROFS_MAP_MAPPED;
+ }
+ goto out;
+ }
+ /* parse chunk indexes */
+ idx = (void *)buf + erofs_blkoff(pos);
+ switch (le32_to_cpu(idx->blkaddr)) {
+ case EROFS_NULL_ADDR:
+ map->m_flags = 0;
+ break;
+ default:
+ /* only one device is supported for now */
+ if (idx->device_id) {
+ erofs_err("invalid device id %u @ %" PRIu64 " for nid %llu",
+ le16_to_cpu(idx->device_id),
+ chunknr, vi->nid | 0ULL);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+ map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
+ map->m_flags = EROFS_MAP_MAPPED;
+ break;
+ }
+out:
+ map->m_llen = map->m_plen;
+ return err;
+}
+
static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
erofs_off_t size, erofs_off_t offset)
{
@@ -84,7 +149,7 @@ static int erofs_read_raw_data(struct erofs_inode *inode, char *buffer,
erofs_off_t eend;
map.m_la = ptr;
- ret = erofs_map_blocks_flatmode(inode, &map, 0);
+ ret = erofs_map_blocks(inode, &map, 0);
if (ret)
return ret;
@@ -206,6 +271,7 @@ int erofs_pread(struct erofs_inode *inode, char *buf,
switch (inode->datalayout) {
case EROFS_INODE_FLAT_PLAIN:
case EROFS_INODE_FLAT_INLINE:
+ case EROFS_INODE_CHUNK_BASED:
return erofs_read_raw_data(inode, buf, count, offset);
case EROFS_INODE_FLAT_COMPRESSION_LEGACY:
case EROFS_INODE_FLAT_COMPRESSION:
diff --git a/lib/namei.c b/lib/namei.c
index f96e400c36b0..b4bdabf10acb 100644
--- a/lib/namei.c
+++ b/lib/namei.c
@@ -82,6 +82,9 @@ static int erofs_read_inode_from_disk(struct erofs_inode *vi)
vi->i_ctime = le64_to_cpu(die->i_ctime);
vi->i_ctime_nsec = le64_to_cpu(die->i_ctime_nsec);
vi->i_size = le64_to_cpu(die->i_size);
+ if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+ /* fill chunked inode summary info */
+ vi->u.chunkformat = le16_to_cpu(die->i_u.c.format);
break;
case EROFS_INODE_LAYOUT_COMPACT:
vi->inode_isize = sizeof(struct erofs_inode_compact);
@@ -115,6 +118,8 @@ static int erofs_read_inode_from_disk(struct erofs_inode *vi)
vi->i_ctime_nsec = sbi.build_time_nsec;
vi->i_size = le32_to_cpu(dic->i_size);
+ if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
+ vi->u.chunkformat = le16_to_cpu(dic->i_u.c.format);
break;
default:
erofs_err("unsupported on-disk inode version %u of nid %llu",
@@ -123,7 +128,15 @@ static int erofs_read_inode_from_disk(struct erofs_inode *vi)
}
vi->flags = 0;
- if (erofs_inode_is_data_compressed(vi->datalayout))
+ if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+ if (vi->u.chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
+ erofs_err("unsupported chunk format %x of nid %llu",
+ vi->u.chunkformat, vi->nid | 0ULL);
+ return -EOPNOTSUPP;
+ }
+ vi->u.chunkbits = LOG_BLOCK_SIZE +
+ (vi->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
+ } else if (erofs_inode_is_data_compressed(vi->datalayout))
z_erofs_fill_inode(vi);
return 0;
bogusimode:
--
2.24.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 2/5] erofs-utils: introduce hashmap from git source
2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
2021-09-22 18:56 ` [PATCH v3 3/5] erofs-utils: introduce sha256 Gao Xiang
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao
Copied from git source (it's already workable).
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
include/erofs/flex-array.h | 147 +++++++++++++++++++
include/erofs/hashmap.h | 103 ++++++++++++++
lib/Makefile.am | 3 +-
lib/hashmap.c | 284 +++++++++++++++++++++++++++++++++++++
4 files changed, 536 insertions(+), 1 deletion(-)
create mode 100644 include/erofs/flex-array.h
create mode 100644 include/erofs/hashmap.h
create mode 100644 lib/hashmap.c
diff --git a/include/erofs/flex-array.h b/include/erofs/flex-array.h
new file mode 100644
index 000000000000..59168d05ee5a
--- /dev/null
+++ b/include/erofs/flex-array.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EROFS_FLEX_ARRAY_H
+#define __EROFS_FLEX_ARRAY_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "defs.h"
+#include "print.h"
+
+/*
+ * flex-array.h
+ *
+ * Some notes to make sense of the code.
+ *
+ * Flex-arrays:
+ * - Flex-arrays became standard in C99 and are defined by "array[]" (at the
+ * end of a struct)
+ * - Pre-C99 flex-arrays can be accomplished by "array[1]"
+ * - There is a GNU extension where they are defined using "array[0]"
+ * Allegedly there is/was a bug in gcc whereby foo[1] generated incorrect
+ * code, so it's safest to use [0] (https://lkml.org/lkml/2015/2/18/407).
+ *
+ * For C89 and C90, __STDC__ is 1
+ * For later standards, __STDC_VERSION__ is defined according to the standard.
+ * For example: 199901L or 201112L
+ *
+ * Whilst we're on the subject, in version 5 of gcc, the default std was
+ * changed from gnu89 to gnu11. In jgmenu, CFLAGS therefore contains -std=gnu89
+ * You can check your default gcc std by doing:
+ * gcc -dM -E - </dev/null | grep '__STDC_VERSION__\|__STDC__'
+ *
+ * The code below is copied from git's git-compat-util.h in support of
+ * hashmap.c
+ */
+
+#ifndef FLEX_ARRAY
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+ (!defined(__SUNPRO_C) || (__SUNPRO_C > 0x580))
+# define FLEX_ARRAY /* empty */
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 3)
+# define FLEX_ARRAY /* empty */
+# else
+# define FLEX_ARRAY 0 /* older GNU extension */
+# endif
+#endif
+
+/* Otherwise, default to safer but a bit wasteful traditional style */
+#ifndef FLEX_ARRAY
+# define FLEX_ARRAY 1
+#endif
+#endif
+
+#define bitsizeof(x) (CHAR_BIT * sizeof(x))
+
+#define maximum_signed_value_of_type(a) \
+ (INTMAX_MAX >> (bitsizeof(intmax_t) - bitsizeof(a)))
+
+#define maximum_unsigned_value_of_type(a) \
+ (UINTMAX_MAX >> (bitsizeof(uintmax_t) - bitsizeof(a)))
+
+/*
+ * Signed integer overflow is undefined in C, so here's a helper macro
+ * to detect if the sum of two integers will overflow.
+ * Requires: a >= 0, typeof(a) equals typeof(b)
+ */
+#define signed_add_overflows(a, b) \
+ ((b) > maximum_signed_value_of_type(a) - (a))
+
+#define unsigned_add_overflows(a, b) \
+ ((b) > maximum_unsigned_value_of_type(a) - (a))
+
+static inline size_t st_add(size_t a, size_t b)
+{
+ if (unsigned_add_overflows(a, b)) {
+ erofs_err("size_t overflow: %llu + %llu", a | 0ULL, b | 0ULL);
+ BUG_ON(1);
+ return -1;
+ }
+ return a + b;
+}
+
+#define st_add3(a, b, c) st_add(st_add((a), (b)), (c))
+#define st_add4(a, b, c, d) st_add(st_add3((a), (b), (c)), (d))
+
+/*
+ * These functions help you allocate structs with flex arrays, and copy
+ * the data directly into the array. For example, if you had:
+ *
+ * struct foo {
+ * int bar;
+ * char name[FLEX_ARRAY];
+ * };
+ *
+ * you can do:
+ *
+ * struct foo *f;
+ * FLEX_ALLOC_MEM(f, name, src, len);
+ *
+ * to allocate a "foo" with the contents of "src" in the "name" field.
+ * The resulting struct is automatically zero'd, and the flex-array field
+ * is NUL-terminated (whether the incoming src buffer was or not).
+ *
+ * The FLEXPTR_* variants operate on structs that don't use flex-arrays,
+ * but do want to store a pointer to some extra data in the same allocated
+ * block. For example, if you have:
+ *
+ * struct foo {
+ * char *name;
+ * int bar;
+ * };
+ *
+ * you can do:
+ *
+ * struct foo *f;
+ * FLEXPTR_ALLOC_STR(f, name, src);
+ *
+ * and "name" will point to a block of memory after the struct, which will be
+ * freed along with the struct (but the pointer can be repointed anywhere).
+ *
+ * The *_STR variants accept a string parameter rather than a ptr/len
+ * combination.
+ *
+ * Note that these macros will evaluate the first parameter multiple
+ * times, and it must be assignable as an lvalue.
+ */
+#define FLEX_ALLOC_MEM(x, flexname, buf, len) do { \
+ size_t flex_array_len_ = (len); \
+ (x) = calloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
+ BUG_ON(!(x)); \
+ memcpy((void *)(x)->flexname, (buf), flex_array_len_); \
+} while (0)
+#define FLEXPTR_ALLOC_MEM(x, ptrname, buf, len) do { \
+ size_t flex_array_len_ = (len); \
+ (x) = xcalloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
+ memcpy((x) + 1, (buf), flex_array_len_); \
+ (x)->ptrname = (void *)((x) + 1); \
+} while (0)
+#define FLEX_ALLOC_STR(x, flexname, str) \
+ FLEX_ALLOC_MEM((x), flexname, (str), strlen(str))
+#define FLEXPTR_ALLOC_STR(x, ptrname, str) \
+ FLEXPTR_ALLOC_MEM((x), ptrname, (str), strlen(str))
+
+#endif
diff --git a/include/erofs/hashmap.h b/include/erofs/hashmap.h
new file mode 100644
index 000000000000..024a14e497d4
--- /dev/null
+++ b/include/erofs/hashmap.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EROFS_HASHMAP_H
+#define __EROFS_HASHMAP_H
+
+/* Copied from https://github.com/git/git.git */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "flex-array.h"
+
+/*
+ * Generic implementation of hash-based key-value mappings.
+ * See Documentation/technical/api-hashmap.txt.
+ */
+
+/* FNV-1 functions */
+unsigned int strhash(const char *str);
+unsigned int strihash(const char *str);
+unsigned int memhash(const void *buf, size_t len);
+unsigned int memihash(const void *buf, size_t len);
+
+static inline unsigned int sha1hash(const unsigned char *sha1)
+{
+ /*
+ * Equivalent to 'return *(unsigned int *)sha1;', but safe on
+ * platforms that don't support unaligned reads.
+ */
+ unsigned int hash;
+
+ memcpy(&hash, sha1, sizeof(hash));
+ return hash;
+}
+
+/* data structures */
+struct hashmap_entry {
+ struct hashmap_entry *next;
+ unsigned int hash;
+};
+
+typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key,
+ const void *keydata);
+
+struct hashmap {
+ struct hashmap_entry **table;
+ hashmap_cmp_fn cmpfn;
+ unsigned int size, tablesize, grow_at, shrink_at;
+};
+
+struct hashmap_iter {
+ struct hashmap *map;
+ struct hashmap_entry *next;
+ unsigned int tablepos;
+};
+
+/* hashmap functions */
+void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
+ size_t initial_size);
+void hashmap_free(struct hashmap *map, int free_entries);
+
+/* hashmap_entry functions */
+static inline void hashmap_entry_init(void *entry, unsigned int hash)
+{
+ struct hashmap_entry *e = entry;
+
+ e->hash = hash;
+ e->next = NULL;
+}
+
+void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata);
+void *hashmap_get_next(const struct hashmap *map, const void *entry);
+void hashmap_add(struct hashmap *map, void *entry);
+void *hashmap_put(struct hashmap *map, void *entry);
+void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata);
+
+static inline void *hashmap_get_from_hash(const struct hashmap *map,
+ unsigned int hash,
+ const void *keydata)
+{
+ struct hashmap_entry key;
+
+ hashmap_entry_init(&key, hash);
+ return hashmap_get(map, &key, keydata);
+}
+
+/* hashmap_iter functions */
+void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter);
+void *hashmap_iter_next(struct hashmap_iter *iter);
+static inline void *hashmap_iter_first(struct hashmap *map,
+ struct hashmap_iter *iter)
+{
+ hashmap_iter_init(map, iter);
+ return hashmap_iter_next(iter);
+}
+
+/* string interning */
+const void *memintern(const void *data, size_t len);
+static inline const char *strintern(const char *string)
+{
+ return memintern(string, strlen(string));
+}
+
+#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 5a33e297c194..7d00bf5fafdc 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -21,7 +21,8 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
- namei.c data.c compress.c compressor.c zmap.c decompress.c compress_hints.c
+ namei.c data.c compress.c compressor.c zmap.c decompress.c \
+ compress_hints.c hashmap.c
liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
if ENABLE_LZ4
liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/hashmap.c b/lib/hashmap.c
new file mode 100644
index 000000000000..e11bd8da94c1
--- /dev/null
+++ b/lib/hashmap.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from https://github.com/git/git.git
+ * Generic implementation of hash-based key value mappings.
+ */
+#include "erofs/hashmap.h"
+
+#define FNV32_BASE ((unsigned int)0x811c9dc5)
+#define FNV32_PRIME ((unsigned int)0x01000193)
+
+unsigned int strhash(const char *str)
+{
+ unsigned int c, hash = FNV32_BASE;
+
+ while ((c = (unsigned char)*str++))
+ hash = (hash * FNV32_PRIME) ^ c;
+ return hash;
+}
+
+unsigned int strihash(const char *str)
+{
+ unsigned int c, hash = FNV32_BASE;
+
+ while ((c = (unsigned char)*str++)) {
+ if (c >= 'a' && c <= 'z')
+ c -= 'a' - 'A';
+ hash = (hash * FNV32_PRIME) ^ c;
+ }
+ return hash;
+}
+
+unsigned int memhash(const void *buf, size_t len)
+{
+ unsigned int hash = FNV32_BASE;
+ unsigned char *ucbuf = (unsigned char *)buf;
+
+ while (len--) {
+ unsigned int c = *ucbuf++;
+
+ hash = (hash * FNV32_PRIME) ^ c;
+ }
+ return hash;
+}
+
+unsigned int memihash(const void *buf, size_t len)
+{
+ unsigned int hash = FNV32_BASE;
+ unsigned char *ucbuf = (unsigned char *)buf;
+
+ while (len--) {
+ unsigned int c = *ucbuf++;
+
+ if (c >= 'a' && c <= 'z')
+ c -= 'a' - 'A';
+ hash = (hash * FNV32_PRIME) ^ c;
+ }
+ return hash;
+}
+
+#define HASHMAP_INITIAL_SIZE 64
+/* grow / shrink by 2^2 */
+#define HASHMAP_RESIZE_BITS 2
+/* load factor in percent */
+#define HASHMAP_LOAD_FACTOR 80
+
+static void alloc_table(struct hashmap *map, unsigned int size)
+{
+ map->tablesize = size;
+ map->table = calloc(size, sizeof(struct hashmap_entry *));
+ BUG_ON(!map->table);
+
+ /* calculate resize thresholds for new size */
+ map->grow_at = (unsigned int)((uint64_t)size * HASHMAP_LOAD_FACTOR / 100);
+ if (size <= HASHMAP_INITIAL_SIZE)
+ map->shrink_at = 0;
+ else
+ /*
+ * The shrink-threshold must be slightly smaller than
+ * (grow-threshold / resize-factor) to prevent erratic resizing,
+ * thus we divide by (resize-factor + 1).
+ */
+ map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1);
+}
+
+static inline int entry_equals(const struct hashmap *map,
+ const struct hashmap_entry *e1,
+ const struct hashmap_entry *e2,
+ const void *keydata)
+{
+ return (e1 == e2) || (e1->hash == e2->hash && !map->cmpfn(e1, e2, keydata));
+}
+
+static inline unsigned int bucket(const struct hashmap *map,
+ const struct hashmap_entry *key)
+{
+ return key->hash & (map->tablesize - 1);
+}
+
+static void rehash(struct hashmap *map, unsigned int newsize)
+{
+ unsigned int i, oldsize = map->tablesize;
+ struct hashmap_entry **oldtable = map->table;
+
+ alloc_table(map, newsize);
+ for (i = 0; i < oldsize; i++) {
+ struct hashmap_entry *e = oldtable[i];
+
+ while (e) {
+ struct hashmap_entry *next = e->next;
+ unsigned int b = bucket(map, e);
+
+ e->next = map->table[b];
+ map->table[b] = e;
+ e = next;
+ }
+ }
+ free(oldtable);
+}
+
+static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map,
+ const struct hashmap_entry *key,
+ const void *keydata)
+{
+ struct hashmap_entry **e = &map->table[bucket(map, key)];
+
+ while (*e && !entry_equals(map, *e, key, keydata))
+ e = &(*e)->next;
+ return e;
+}
+
+static int always_equal(const void *unused1, const void *unused2, const void *unused3)
+{
+ return 0;
+}
+
+void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
+ size_t initial_size)
+{
+ unsigned int size = HASHMAP_INITIAL_SIZE;
+
+ map->size = 0;
+ map->cmpfn = equals_function ? equals_function : always_equal;
+
+ /* calculate initial table size and allocate the table */
+ initial_size = (unsigned int)((uint64_t)initial_size * 100
+ / HASHMAP_LOAD_FACTOR);
+ while (initial_size > size)
+ size <<= HASHMAP_RESIZE_BITS;
+ alloc_table(map, size);
+}
+
+void hashmap_free(struct hashmap *map, int free_entries)
+{
+ if (!map || !map->table)
+ return;
+ if (free_entries) {
+ struct hashmap_iter iter;
+ struct hashmap_entry *e;
+
+ hashmap_iter_init(map, &iter);
+ while ((e = hashmap_iter_next(&iter)))
+ free(e);
+ }
+ free(map->table);
+ memset(map, 0, sizeof(*map));
+}
+
+void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)
+{
+ return *find_entry_ptr(map, key, keydata);
+}
+
+void *hashmap_get_next(const struct hashmap *map, const void *entry)
+{
+ struct hashmap_entry *e = ((struct hashmap_entry *)entry)->next;
+
+ for (; e; e = e->next)
+ if (entry_equals(map, entry, e, NULL))
+ return e;
+ return NULL;
+}
+
+void hashmap_add(struct hashmap *map, void *entry)
+{
+ unsigned int b = bucket(map, entry);
+
+ /* add entry */
+ ((struct hashmap_entry *)entry)->next = map->table[b];
+ map->table[b] = entry;
+
+ /* fix size and rehash if appropriate */
+ map->size++;
+ if (map->size > map->grow_at)
+ rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
+}
+
+void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
+{
+ struct hashmap_entry *old;
+ struct hashmap_entry **e = find_entry_ptr(map, key, keydata);
+
+ if (!*e)
+ return NULL;
+
+ /* remove existing entry */
+ old = *e;
+ *e = old->next;
+ old->next = NULL;
+
+ /* fix size and rehash if appropriate */
+ map->size--;
+ if (map->size < map->shrink_at)
+ rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
+ return old;
+}
+
+void *hashmap_put(struct hashmap *map, void *entry)
+{
+ struct hashmap_entry *old = hashmap_remove(map, entry, NULL);
+
+ hashmap_add(map, entry);
+ return old;
+}
+
+void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)
+{
+ iter->map = map;
+ iter->tablepos = 0;
+ iter->next = NULL;
+}
+
+void *hashmap_iter_next(struct hashmap_iter *iter)
+{
+ struct hashmap_entry *current = iter->next;
+
+ for (;;) {
+ if (current) {
+ iter->next = current->next;
+ return current;
+ }
+
+ if (iter->tablepos >= iter->map->tablesize)
+ return NULL;
+
+ current = iter->map->table[iter->tablepos++];
+ }
+}
+
+struct pool_entry {
+ struct hashmap_entry ent;
+ size_t len;
+ unsigned char data[FLEX_ARRAY];
+};
+
+static int pool_entry_cmp(const struct pool_entry *e1,
+ const struct pool_entry *e2,
+ const unsigned char *keydata)
+{
+ return e1->data != keydata &&
+ (e1->len != e2->len || memcmp(e1->data, keydata, e1->len));
+}
+
+const void *memintern(const void *data, size_t len)
+{
+ static struct hashmap map;
+ struct pool_entry key, *e;
+
+ /* initialize string pool hashmap */
+ if (!map.tablesize)
+ hashmap_init(&map, (hashmap_cmp_fn)pool_entry_cmp, 0);
+
+ /* lookup interned string in pool */
+ hashmap_entry_init(&key, memhash(data, len));
+ key.len = len;
+ e = hashmap_get(&map, &key, data);
+ if (!e) {
+ /* not found: create it */
+ FLEX_ALLOC_MEM(e, data, data, len);
+ hashmap_entry_init(e, key.ent.hash);
+ e->len = len;
+ hashmap_add(&map, e);
+ }
+ return e->data;
+}
--
2.24.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 3/5] erofs-utils: introduce sha256
2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
2021-09-22 18:56 ` [PATCH v3 2/5] erofs-utils: introduce hashmap from git source Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
2021-09-22 18:56 ` [PATCH v3 4/5] erofs-utils: introduce copy_file_range Gao Xiang
2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao
A simple sha256 approach copied from e2fsprogs.
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
lib/Makefile.am | 2 +-
lib/sha256.c | 248 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 249 insertions(+), 1 deletion(-)
create mode 100644 lib/sha256.c
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 7d00bf5fafdc..2638a109c29c 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -22,7 +22,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
- compress_hints.c hashmap.c
+ compress_hints.c hashmap.c sha256.c
liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
if ENABLE_LZ4
liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/sha256.c b/lib/sha256.c
new file mode 100644
index 000000000000..dd0e058662ff
--- /dev/null
+++ b/lib/sha256.c
@@ -0,0 +1,248 @@
+/*
+ * sha256.c --- The sha256 algorithm
+ *
+ * Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
+ * (copied from libtomcrypt and then relicensed under GPLv2)
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Library
+ * General Public License, version 2.
+ * %End-Header%
+ */
+#include "erofs/defs.h"
+#include <string.h>
+
+static const __u32 K[64] = {
+ 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL,
+ 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL,
+ 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL,
+ 0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+ 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL,
+ 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL,
+ 0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL,
+ 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+ 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL,
+ 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL,
+ 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL,
+ 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+ 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
+
+/* Various logical functions */
+#define Ch(x,y,z) (z ^ (x & (y ^ z)))
+#define Maj(x,y,z) (((x | y) & z) | (x & y))
+#define S(x, n) RORc((x),(n))
+#define R(x, n) (((x)&0xFFFFFFFFUL)>>(n))
+#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
+#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
+#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
+#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+#define RORc(x, y) ( ((((__u32)(x)&0xFFFFFFFFUL)>>(__u32)((y)&31)) | ((__u32)(x)<<(__u32)(32-((y)&31)))) & 0xFFFFFFFFUL)
+
+#define RND(a,b,c,d,e,f,g,h,i) \
+ t0 = h + Sigma1(e) + Ch(e, f, g) + K[i] + W[i]; \
+ t1 = Sigma0(a) + Maj(a, b, c); \
+ d += t0; \
+ h = t0 + t1;
+
+#define STORE64H(x, y) \
+ do { \
+ (y)[0] = (unsigned char)(((x)>>56)&255);\
+ (y)[1] = (unsigned char)(((x)>>48)&255);\
+ (y)[2] = (unsigned char)(((x)>>40)&255);\
+ (y)[3] = (unsigned char)(((x)>>32)&255);\
+ (y)[4] = (unsigned char)(((x)>>24)&255);\
+ (y)[5] = (unsigned char)(((x)>>16)&255);\
+ (y)[6] = (unsigned char)(((x)>>8)&255);\
+ (y)[7] = (unsigned char)((x)&255); } while(0)
+
+#define STORE32H(x, y) \
+ do { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned char)(((x)>>16)&255); \
+ (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } while(0)
+
+#define LOAD32H(x, y) \
+ do { x = ((__u32)((y)[0] & 255)<<24) | \
+ ((__u32)((y)[1] & 255)<<16) | \
+ ((__u32)((y)[2] & 255)<<8) | \
+ ((__u32)((y)[3] & 255)); } while(0)
+
+struct sha256_state {
+ __u64 length;
+ __u32 state[8], curlen;
+ unsigned char buf[64];
+};
+
+/* This is a highly simplified version from libtomcrypt */
+struct hash_state {
+ struct sha256_state sha256;
+};
+
+static void sha256_compress(struct hash_state * md, const unsigned char *buf)
+{
+ __u32 S[8], W[64], t0, t1;
+ __u32 t;
+ int i;
+
+ /* copy state into S */
+ for (i = 0; i < 8; i++) {
+ S[i] = md->sha256.state[i];
+ }
+
+ /* copy the state into 512-bits into W[0..15] */
+ for (i = 0; i < 16; i++) {
+ LOAD32H(W[i], buf + (4*i));
+ }
+
+ /* fill W[16..63] */
+ for (i = 16; i < 64; i++) {
+ W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
+ }
+
+ /* Compress */
+ for (i = 0; i < 64; ++i) {
+ RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i);
+ t = S[7]; S[7] = S[6]; S[6] = S[5]; S[5] = S[4];
+ S[4] = S[3]; S[3] = S[2]; S[2] = S[1]; S[1] = S[0]; S[0] = t;
+ }
+
+ /* feedback */
+ for (i = 0; i < 8; i++) {
+ md->sha256.state[i] = md->sha256.state[i] + S[i];
+ }
+}
+
+static void sha256_init(struct hash_state * md)
+{
+ md->sha256.curlen = 0;
+ md->sha256.length = 0;
+ md->sha256.state[0] = 0x6A09E667UL;
+ md->sha256.state[1] = 0xBB67AE85UL;
+ md->sha256.state[2] = 0x3C6EF372UL;
+ md->sha256.state[3] = 0xA54FF53AUL;
+ md->sha256.state[4] = 0x510E527FUL;
+ md->sha256.state[5] = 0x9B05688CUL;
+ md->sha256.state[6] = 0x1F83D9ABUL;
+ md->sha256.state[7] = 0x5BE0CD19UL;
+}
+
+#define MIN(x, y) ( ((x)<(y))?(x):(y) )
+#define SHA256_BLOCKSIZE 64
+static void sha256_process(struct hash_state * md, const unsigned char *in, unsigned long inlen)
+{
+ unsigned long n;
+
+ while (inlen > 0) {
+ if (md->sha256.curlen == 0 && inlen >= SHA256_BLOCKSIZE) {
+ sha256_compress(md, in);
+ md->sha256.length += SHA256_BLOCKSIZE * 8;
+ in += SHA256_BLOCKSIZE;
+ inlen -= SHA256_BLOCKSIZE;
+ } else {
+ n = MIN(inlen, (SHA256_BLOCKSIZE - md->sha256.curlen));
+ memcpy(md->sha256.buf + md->sha256.curlen, in, (size_t)n);
+ md->sha256.curlen += n;
+ in += n;
+ inlen -= n;
+ if (md->sha256.curlen == SHA256_BLOCKSIZE) {
+ sha256_compress(md, md->sha256.buf);
+ md->sha256.length += 8*SHA256_BLOCKSIZE;
+ md->sha256.curlen = 0;
+ }
+ }
+ }
+}
+
+static void sha256_done(struct hash_state * md, unsigned char *out)
+{
+ int i;
+
+ /* increase the length of the message */
+ md->sha256.length += md->sha256.curlen * 8;
+
+ /* append the '1' bit */
+ md->sha256.buf[md->sha256.curlen++] = (unsigned char)0x80;
+
+ /* if the length is currently above 56 bytes we append zeros
+ * then compress. Then we can fall back to padding zeros and length
+ * encoding like normal.
+ */
+ if (md->sha256.curlen > 56) {
+ while (md->sha256.curlen < 64) {
+ md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
+ }
+ sha256_compress(md, md->sha256.buf);
+ md->sha256.curlen = 0;
+ }
+
+ /* pad upto 56 bytes of zeroes */
+ while (md->sha256.curlen < 56) {
+ md->sha256.buf[md->sha256.curlen++] = (unsigned char)0;
+ }
+
+ /* store length */
+ STORE64H(md->sha256.length, md->sha256.buf+56);
+ sha256_compress(md, md->sha256.buf);
+
+ /* copy output */
+ for (i = 0; i < 8; i++) {
+ STORE32H(md->sha256.state[i], out+(4*i));
+ }
+}
+
+void erofs_sha256(const unsigned char *in, unsigned long in_size,
+ unsigned char out[32])
+{
+ struct hash_state md;
+
+ sha256_init(&md);
+ sha256_process(&md, in, in_size);
+ sha256_done(&md, out);
+}
+
+#ifdef UNITTEST
+static const struct {
+ char *msg;
+ unsigned char hash[32];
+} tests[] = {
+ { "",
+ { 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
+ 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
+ 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
+ 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 }
+ },
+ { "abc",
+ { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea,
+ 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23,
+ 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
+ 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad }
+ },
+ { "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+ { 0x24, 0x8d, 0x6a, 0x61, 0xd2, 0x06, 0x38, 0xb8,
+ 0xe5, 0xc0, 0x26, 0x93, 0x0c, 0x3e, 0x60, 0x39,
+ 0xa3, 0x3c, 0xe4, 0x59, 0x64, 0xff, 0x21, 0x67,
+ 0xf6, 0xec, 0xed, 0xd4, 0x19, 0xdb, 0x06, 0xc1 }
+ },
+};
+
+int main(int argc, char **argv)
+{
+ int i;
+ int errors = 0;
+ unsigned char tmp[32];
+
+ for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) {
+ unsigned char *msg = (unsigned char *) tests[i].msg;
+ int len = strlen(tests[i].msg);
+
+ erofs_sha256(msg, len, tmp);
+ printf("SHA256 test message %d: ", i);
+ if (memcmp(tmp, tests[i].hash, 32) != 0) {
+ printf("FAILED\n");
+ errors++;
+ } else
+ printf("OK\n");
+ }
+ return errors;
+}
+
+#endif /* UNITTEST */
--
2.24.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 4/5] erofs-utils: introduce copy_file_range
2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
` (2 preceding siblings ...)
2021-09-22 18:56 ` [PATCH v3 3/5] erofs-utils: introduce sha256 Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
4 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao
Add copy_file_range support. Emulate it instead if libc
doesn't support it or have no emulation.
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
configure.ac | 1 +
include/erofs/io.h | 5 +++
lib/io.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 101 insertions(+)
diff --git a/configure.ac b/configure.ac
index a749db0aed65..9d7d5c22e53f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -158,6 +158,7 @@ AC_CHECK_DECL(lseek64,[AC_DEFINE(HAVE_LSEEK64_PROTOTYPE, 1,
# Checks for library functions.
AC_CHECK_FUNCS(m4_flatten([
backtrace
+ copy_file_range
fallocate
gettimeofday
lgetxattr
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 0763baf50dc3..2597bf48a1c4 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -7,6 +7,7 @@
#ifndef __EROFS_IO_H
#define __EROFS_IO_H
+#define _GNU_SOURCE
#include <unistd.h>
#include "internal.h"
@@ -24,6 +25,10 @@ int dev_fsync(void);
int dev_resize(erofs_blk_t nblocks);
u64 dev_length(void);
+int erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
+ int fd_out, erofs_off_t *off_out,
+ size_t length);
+
static inline int blk_write(const void *buf, erofs_blk_t blkaddr,
u32 nblocks)
{
diff --git a/lib/io.c b/lib/io.c
index 620cb9c960e1..504a69e4bdc1 100644
--- a/lib/io.c
+++ b/lib/io.c
@@ -258,3 +258,98 @@ int dev_read(void *buf, u64 offset, size_t len)
}
return 0;
}
+
+static int __erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
+ int fd_out, erofs_off_t *off_out,
+ size_t length)
+{
+ size_t copied = 0;
+ char buf[8192];
+
+ /*
+ * Main copying loop. The buffer size is arbitrary and is a
+ * trade-off between stack size consumption, cache usage, and
+ * amortization of system call overhead.
+ */
+ while (length > 0) {
+ size_t to_read;
+ ssize_t read_count;
+ char *end, *p;
+
+ to_read = min_t(size_t, length, sizeof(buf));
+#ifdef HAVE_PREAD64
+ read_count = pread64(fd_in, buf, to_read, *off_in);
+#else
+ read_count = pread(fd_in, buf, to_read, *off_in);
+#endif
+ if (read_count == 0)
+ /* End of file reached prematurely. */
+ return copied;
+ if (read_count < 0) {
+ /* Report the number of bytes copied so far. */
+ if (copied > 0)
+ return copied;
+ return -1;
+ }
+ *off_in += read_count;
+
+ /* Write the buffer part which was read to the destination. */
+ end = buf + read_count;
+ for (p = buf; p < end; ) {
+ ssize_t write_count;
+
+#ifdef HAVE_PWRITE64
+ write_count = pwrite64(fd_out, p, end - p, *off_out);
+#else
+ write_count = pwrite(fd_out, p, end - p, *off_out);
+#endif
+ if (write_count < 0) {
+ /*
+ * Adjust the input read position to match what
+ * we have written, so that the caller can pick
+ * up after the error.
+ */
+ size_t written = p - buf;
+ /*
+ * NB: This needs to be signed so that we can
+ * form the negative value below.
+ */
+ ssize_t overread = read_count - written;
+
+ *off_in -= overread;
+ /* Report the number of bytes copied so far. */
+ if (copied + written > 0)
+ return copied + written;
+ return -1;
+ }
+ p += write_count;
+ *off_out += write_count;
+ } /* Write loop. */
+ copied += read_count;
+ length -= read_count;
+ }
+ return copied;
+}
+
+int erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
+ int fd_out, erofs_off_t *off_out,
+ size_t length)
+{
+#ifdef HAVE_COPY_FILE_RANGE
+ off64_t off64_in = *off_in, off64_out = *off_out;
+ ssize_t ret;
+
+ ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out,
+ length, 0);
+ if (ret >= 0)
+ goto out;
+ if (errno != ENOSYS) {
+ ret = -errno;
+out:
+ *off_in = off64_in;
+ *off_out = off64_out;
+ return ret;
+ }
+#endif
+ return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length);
+}
--
2.24.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files
2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
` (3 preceding siblings ...)
2021-09-22 18:56 ` [PATCH v3 4/5] erofs-utils: introduce copy_file_range Gao Xiang
@ 2021-09-22 18:56 ` Gao Xiang
2021-09-22 19:07 ` Gao Xiang
4 siblings, 1 reply; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 18:56 UTC (permalink / raw)
To: linux-erofs; +Cc: Gao Xiang, Liu Jiang, Liu Bo, Peng Tao
mkfs support for the new chunk-based uncompressed files,
including:
* chunk-based files with 4-byte block address array;
* chunk-based files with 8-byte inode chunk indexes.
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
include/erofs/blobchunk.h | 18 ++++
include/erofs/config.h | 1 +
include/erofs/defs.h | 77 ++++++++++++++
include/erofs/hashtable.h | 77 --------------
include/erofs/internal.h | 1 +
include/erofs/io.h | 2 +
lib/Makefile.am | 2 +-
lib/blobchunk.c | 217 ++++++++++++++++++++++++++++++++++++++
lib/inode.c | 36 +++++--
lib/io.c | 2 +-
man/mkfs.erofs.1 | 3 +
mkfs/main.c | 38 +++++++
12 files changed, 389 insertions(+), 85 deletions(-)
create mode 100644 include/erofs/blobchunk.h
create mode 100644 lib/blobchunk.c
diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h
new file mode 100644
index 000000000000..b418227e0ef8
--- /dev/null
+++ b/include/erofs/blobchunk.h
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/blobchunk.h
+ *
+ * Copyright (C) 2021, Alibaba Cloud
+ */
+#ifndef __EROFS_BLOBCHUNK_H
+#define __EROFS_BLOBCHUNK_H
+
+#include "erofs/internal.h"
+
+int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off);
+int erofs_blob_write_chunked_file(struct erofs_inode *inode);
+int erofs_blob_remap(void);
+void erofs_blob_exit(void);
+int erofs_blob_init(void);
+
+#endif
diff --git a/include/erofs/config.h b/include/erofs/config.h
index d5d9b5a751c0..574dd52be12d 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -42,6 +42,7 @@ struct erofs_configure {
bool c_random_pclusterblks;
#endif
char c_timeinherit;
+ char c_chunkbits;
bool c_noinline_data;
#ifdef HAVE_LIBSELINUX
diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index 6e0a7774871c..96bbb6574ff3 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -175,6 +175,83 @@ static inline u32 get_unaligned_le32(const u8 *p)
return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
}
+/**
+ * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
+ * @n - parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+ (n) & (1ULL << 63) ? 63 : \
+ (n) & (1ULL << 62) ? 62 : \
+ (n) & (1ULL << 61) ? 61 : \
+ (n) & (1ULL << 60) ? 60 : \
+ (n) & (1ULL << 59) ? 59 : \
+ (n) & (1ULL << 58) ? 58 : \
+ (n) & (1ULL << 57) ? 57 : \
+ (n) & (1ULL << 56) ? 56 : \
+ (n) & (1ULL << 55) ? 55 : \
+ (n) & (1ULL << 54) ? 54 : \
+ (n) & (1ULL << 53) ? 53 : \
+ (n) & (1ULL << 52) ? 52 : \
+ (n) & (1ULL << 51) ? 51 : \
+ (n) & (1ULL << 50) ? 50 : \
+ (n) & (1ULL << 49) ? 49 : \
+ (n) & (1ULL << 48) ? 48 : \
+ (n) & (1ULL << 47) ? 47 : \
+ (n) & (1ULL << 46) ? 46 : \
+ (n) & (1ULL << 45) ? 45 : \
+ (n) & (1ULL << 44) ? 44 : \
+ (n) & (1ULL << 43) ? 43 : \
+ (n) & (1ULL << 42) ? 42 : \
+ (n) & (1ULL << 41) ? 41 : \
+ (n) & (1ULL << 40) ? 40 : \
+ (n) & (1ULL << 39) ? 39 : \
+ (n) & (1ULL << 38) ? 38 : \
+ (n) & (1ULL << 37) ? 37 : \
+ (n) & (1ULL << 36) ? 36 : \
+ (n) & (1ULL << 35) ? 35 : \
+ (n) & (1ULL << 34) ? 34 : \
+ (n) & (1ULL << 33) ? 33 : \
+ (n) & (1ULL << 32) ? 32 : \
+ (n) & (1ULL << 31) ? 31 : \
+ (n) & (1ULL << 30) ? 30 : \
+ (n) & (1ULL << 29) ? 29 : \
+ (n) & (1ULL << 28) ? 28 : \
+ (n) & (1ULL << 27) ? 27 : \
+ (n) & (1ULL << 26) ? 26 : \
+ (n) & (1ULL << 25) ? 25 : \
+ (n) & (1ULL << 24) ? 24 : \
+ (n) & (1ULL << 23) ? 23 : \
+ (n) & (1ULL << 22) ? 22 : \
+ (n) & (1ULL << 21) ? 21 : \
+ (n) & (1ULL << 20) ? 20 : \
+ (n) & (1ULL << 19) ? 19 : \
+ (n) & (1ULL << 18) ? 18 : \
+ (n) & (1ULL << 17) ? 17 : \
+ (n) & (1ULL << 16) ? 16 : \
+ (n) & (1ULL << 15) ? 15 : \
+ (n) & (1ULL << 14) ? 14 : \
+ (n) & (1ULL << 13) ? 13 : \
+ (n) & (1ULL << 12) ? 12 : \
+ (n) & (1ULL << 11) ? 11 : \
+ (n) & (1ULL << 10) ? 10 : \
+ (n) & (1ULL << 9) ? 9 : \
+ (n) & (1ULL << 8) ? 8 : \
+ (n) & (1ULL << 7) ? 7 : \
+ (n) & (1ULL << 6) ? 6 : \
+ (n) & (1ULL << 5) ? 5 : \
+ (n) & (1ULL << 4) ? 4 : \
+ (n) & (1ULL << 3) ? 3 : \
+ (n) & (1ULL << 2) ? 2 : \
+ (n) & (1ULL << 1) ? 1 : 0 \
+)
+
#ifndef __always_inline
#define __always_inline inline
#endif
diff --git a/include/erofs/hashtable.h b/include/erofs/hashtable.h
index a71cb0044816..90eb84ee8598 100644
--- a/include/erofs/hashtable.h
+++ b/include/erofs/hashtable.h
@@ -262,83 +262,6 @@ static __always_inline u32 hash_64(u64 val, unsigned int bits)
#endif
}
-/**
- * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
- * @n - parameter
- *
- * constant-capable log of base 2 calculation
- * - this can be used to initialise global variables from constant data, hence
- * the massive ternary operator construction
- *
- * selects the appropriately-sized optimised version depending on sizeof(n)
- */
-#define ilog2(n) \
-( \
- (n) & (1ULL << 63) ? 63 : \
- (n) & (1ULL << 62) ? 62 : \
- (n) & (1ULL << 61) ? 61 : \
- (n) & (1ULL << 60) ? 60 : \
- (n) & (1ULL << 59) ? 59 : \
- (n) & (1ULL << 58) ? 58 : \
- (n) & (1ULL << 57) ? 57 : \
- (n) & (1ULL << 56) ? 56 : \
- (n) & (1ULL << 55) ? 55 : \
- (n) & (1ULL << 54) ? 54 : \
- (n) & (1ULL << 53) ? 53 : \
- (n) & (1ULL << 52) ? 52 : \
- (n) & (1ULL << 51) ? 51 : \
- (n) & (1ULL << 50) ? 50 : \
- (n) & (1ULL << 49) ? 49 : \
- (n) & (1ULL << 48) ? 48 : \
- (n) & (1ULL << 47) ? 47 : \
- (n) & (1ULL << 46) ? 46 : \
- (n) & (1ULL << 45) ? 45 : \
- (n) & (1ULL << 44) ? 44 : \
- (n) & (1ULL << 43) ? 43 : \
- (n) & (1ULL << 42) ? 42 : \
- (n) & (1ULL << 41) ? 41 : \
- (n) & (1ULL << 40) ? 40 : \
- (n) & (1ULL << 39) ? 39 : \
- (n) & (1ULL << 38) ? 38 : \
- (n) & (1ULL << 37) ? 37 : \
- (n) & (1ULL << 36) ? 36 : \
- (n) & (1ULL << 35) ? 35 : \
- (n) & (1ULL << 34) ? 34 : \
- (n) & (1ULL << 33) ? 33 : \
- (n) & (1ULL << 32) ? 32 : \
- (n) & (1ULL << 31) ? 31 : \
- (n) & (1ULL << 30) ? 30 : \
- (n) & (1ULL << 29) ? 29 : \
- (n) & (1ULL << 28) ? 28 : \
- (n) & (1ULL << 27) ? 27 : \
- (n) & (1ULL << 26) ? 26 : \
- (n) & (1ULL << 25) ? 25 : \
- (n) & (1ULL << 24) ? 24 : \
- (n) & (1ULL << 23) ? 23 : \
- (n) & (1ULL << 22) ? 22 : \
- (n) & (1ULL << 21) ? 21 : \
- (n) & (1ULL << 20) ? 20 : \
- (n) & (1ULL << 19) ? 19 : \
- (n) & (1ULL << 18) ? 18 : \
- (n) & (1ULL << 17) ? 17 : \
- (n) & (1ULL << 16) ? 16 : \
- (n) & (1ULL << 15) ? 15 : \
- (n) & (1ULL << 14) ? 14 : \
- (n) & (1ULL << 13) ? 13 : \
- (n) & (1ULL << 12) ? 12 : \
- (n) & (1ULL << 11) ? 11 : \
- (n) & (1ULL << 10) ? 10 : \
- (n) & (1ULL << 9) ? 9 : \
- (n) & (1ULL << 8) ? 8 : \
- (n) & (1ULL << 7) ? 7 : \
- (n) & (1ULL << 6) ? 6 : \
- (n) & (1ULL << 5) ? 5 : \
- (n) & (1ULL << 4) ? 4 : \
- (n) & (1ULL << 3) ? 3 : \
- (n) & (1ULL << 2) ? 2 : \
- (n) & (1ULL << 1) ? 1 : 0 \
-)
-
#define DEFINE_HASHTABLE(name, bits) \
struct hlist_head name[1 << (bits)] = \
{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 8621f3426410..8b154edb9f88 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -165,6 +165,7 @@ struct erofs_inode {
union {
void *compressmeta;
+ void *chunkindexes;
struct {
uint16_t z_advise;
uint8_t z_algorithmtype[2];
diff --git a/include/erofs/io.h b/include/erofs/io.h
index 2597bf48a1c4..2597c5c0eb96 100644
--- a/include/erofs/io.h
+++ b/include/erofs/io.h
@@ -25,6 +25,8 @@ int dev_fsync(void);
int dev_resize(erofs_blk_t nblocks);
u64 dev_length(void);
+extern int erofs_devfd;
+
int erofs_copy_file_range(int fd_in, erofs_off_t *off_in,
int fd_out, erofs_off_t *off_out,
size_t length);
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 2638a109c29c..b64d90b3e144 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -22,7 +22,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
- compress_hints.c hashmap.c sha256.c
+ compress_hints.c hashmap.c sha256.c blobchunk.c
liberofs_la_CFLAGS = -Wall -Werror -I$(top_srcdir)/include
if ENABLE_LZ4
liberofs_la_CFLAGS += ${LZ4_CFLAGS}
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
new file mode 100644
index 000000000000..e05d0cb08252
--- /dev/null
+++ b/lib/blobchunk.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * erofs-utils/lib/blobchunk.c
+ *
+ * Copyright (C) 2021, Alibaba Cloud
+ */
+#define _GNU_SOURCE
+#include "erofs/hashmap.h"
+#include "erofs/blobchunk.h"
+#include "erofs/cache.h"
+#include "erofs/io.h"
+#include <unistd.h>
+
+void erofs_sha256(const unsigned char *in, unsigned long in_size,
+ unsigned char out[32]);
+
+struct erofs_blobchunk {
+ struct hashmap_entry ent;
+ char sha256[32];
+ unsigned int chunksize;
+ erofs_blk_t blkaddr;
+};
+
+static struct hashmap blob_hashmap;
+static FILE *blobfile;
+static erofs_blk_t remapped_base;
+
+static struct erofs_blobchunk *erofs_blob_getchunk(int fd,
+ unsigned int chunksize)
+{
+ static u8 zeroed[EROFS_BLKSIZ];
+ u8 *chunkdata, sha256[32];
+ int ret;
+ unsigned int hash;
+ erofs_off_t blkpos;
+ struct erofs_blobchunk *chunk;
+
+ chunkdata = malloc(chunksize);
+ if (!chunkdata)
+ return ERR_PTR(-ENOMEM);
+
+ ret = read(fd, chunkdata, chunksize);
+ if (ret < chunksize) {
+ chunk = ERR_PTR(-EIO);
+ goto out;
+ }
+ erofs_sha256(chunkdata, chunksize, sha256);
+ hash = memhash(sha256, sizeof(sha256));
+ chunk = hashmap_get_from_hash(&blob_hashmap, hash, sha256);
+ if (chunk) {
+ DBG_BUGON(chunksize != chunk->chunksize);
+ goto out;
+ }
+ chunk = malloc(sizeof(struct erofs_blobchunk));
+ if (!chunk) {
+ chunk = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ chunk->chunksize = chunksize;
+ blkpos = ftell(blobfile);
+ DBG_BUGON(erofs_blkoff(blkpos));
+ chunk->blkaddr = erofs_blknr(blkpos);
+ memcpy(chunk->sha256, sha256, sizeof(sha256));
+ hashmap_entry_init(&chunk->ent, hash);
+ hashmap_add(&blob_hashmap, chunk);
+
+ erofs_dbg("Writing chunk (%u bytes) to %u", chunksize, chunk->blkaddr);
+ ret = fwrite(chunkdata, chunksize, 1, blobfile);
+ if (ret == 1 && erofs_blkoff(chunksize))
+ ret = fwrite(zeroed, EROFS_BLKSIZ - erofs_blkoff(chunksize),
+ 1, blobfile);
+ if (ret < 1) {
+ struct hashmap_entry key;
+
+ hashmap_entry_init(&key, hash);
+ hashmap_remove(&blob_hashmap, &key, sha256);
+ chunk = ERR_PTR(-ENOSPC);
+ goto out;
+ }
+out:
+ free(chunkdata);
+ return chunk;
+}
+
+static int erofs_blob_hashmap_cmp(const void *a, const void *b,
+ const void *key)
+{
+ const struct erofs_blobchunk *ec1 =
+ container_of((struct hashmap_entry *)a,
+ struct erofs_blobchunk, ent);
+ const struct erofs_blobchunk *ec2 =
+ container_of((struct hashmap_entry *)b,
+ struct erofs_blobchunk, ent);
+
+ return memcmp(ec1->sha256, key ? key : ec2->sha256,
+ sizeof(ec1->sha256));
+}
+
+int erofs_blob_write_chunk_indexes(struct erofs_inode *inode,
+ erofs_off_t off)
+{
+ struct erofs_inode_chunk_index idx = {0};
+ unsigned int dst, src, unit;
+
+ if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+ unit = sizeof(struct erofs_inode_chunk_index);
+ else
+ unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+
+ for (dst = src = 0; dst < inode->extent_isize;
+ src += sizeof(void *), dst += unit) {
+ struct erofs_blobchunk *chunk;
+
+ chunk = *(void **)(inode->chunkindexes + src);
+
+ idx.blkaddr = chunk->blkaddr + remapped_base;
+ if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE)
+ memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit);
+ else
+ memcpy(inode->chunkindexes + dst, &idx, sizeof(idx));
+ }
+ off = roundup(off, unit);
+
+ return dev_write(inode->chunkindexes, off, inode->extent_isize);
+}
+
+int erofs_blob_write_chunked_file(struct erofs_inode *inode)
+{
+ unsigned int chunksize = 1 << cfg.c_chunkbits;
+ unsigned int count = DIV_ROUND_UP(inode->i_size, chunksize);
+ struct erofs_inode_chunk_index *idx;
+ erofs_off_t pos, len;
+ unsigned int unit;
+ int fd, ret;
+
+ inode->u.chunkformat |= inode->u.chunkbits - LOG_BLOCK_SIZE;
+
+ if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
+ unit = sizeof(struct erofs_inode_chunk_index);
+ else
+ unit = EROFS_BLOCK_MAP_ENTRY_SIZE;
+
+ inode->extent_isize = count * unit;
+ idx = malloc(count * max(sizeof(*idx), sizeof(void *)));
+ if (!idx)
+ return -ENOMEM;
+ inode->chunkindexes = idx;
+
+ fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
+ if (fd < 0) {
+ ret = -errno;
+ goto err;
+ }
+
+ for (pos = 0; pos < inode->i_size; pos += len) {
+ struct erofs_blobchunk *chunk;
+
+ len = min_t(u64, inode->i_size - pos, chunksize);
+ chunk = erofs_blob_getchunk(fd, len);
+ if (IS_ERR(chunk)) {
+ ret = PTR_ERR(chunk);
+ close(fd);
+ goto err;
+ }
+ *(void **)idx++ = chunk;
+ }
+ inode->datalayout = EROFS_INODE_CHUNK_BASED;
+ close(fd);
+ return 0;
+err:
+ free(inode->chunkindexes);
+ inode->chunkindexes = NULL;
+ return ret;
+}
+
+int erofs_blob_remap(void)
+{
+ struct erofs_buffer_head *bh;
+ ssize_t length;
+ erofs_off_t pos_in, pos_out;
+ int ret;
+
+ fflush(blobfile);
+ length = ftell(blobfile);
+ bh = erofs_balloc(DATA, length, 0, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+
+ erofs_mapbh(bh->block);
+ pos_out = erofs_btell(bh, false);
+ pos_in = 0;
+ remapped_base = erofs_blknr(pos_out);
+ ret = erofs_copy_file_range(fileno(blobfile), &pos_in,
+ erofs_devfd, &pos_out, length);
+ bh->op = &erofs_skip_write_bhops;
+ erofs_bdrop(bh, false);
+ return ret < length ? -EIO : 0;
+}
+
+void erofs_blob_exit(void)
+{
+ if (blobfile)
+ fclose(blobfile);
+
+ hashmap_free(&blob_hashmap, 1);
+}
+
+int erofs_blob_init(void)
+{
+ blobfile = tmpfile64();
+ if (!blobfile)
+ return -ENOMEM;
+
+ hashmap_init(&blob_hashmap, erofs_blob_hashmap_cmp, 0);
+ return 0;
+}
diff --git a/lib/inode.c b/lib/inode.c
index 4c40c348aa4b..26ffa4b2bb38 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -24,6 +24,7 @@
#include "erofs/exclude.h"
#include "erofs/block_list.h"
#include "erofs/compress_hints.h"
+#include "erofs/blobchunk.h"
#define S_SHIFT 12
static unsigned char erofs_ftype_by_mode[S_IFMT >> S_SHIFT] = {
@@ -387,6 +388,12 @@ int erofs_write_file(struct erofs_inode *inode)
return 0;
}
+ if (cfg.c_chunkbits) {
+ inode->u.chunkbits = cfg.c_chunkbits;
+ inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES;
+ return erofs_blob_write_chunked_file(inode);
+ }
+
if (cfg.c_compr_alg_master && erofs_file_is_compressible(inode)) {
ret = erofs_write_compressed_file(inode);
@@ -440,6 +447,10 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
if (is_inode_layout_compression(inode))
u.dic.i_u.compressed_blocks =
cpu_to_le32(inode->u.i_blocks);
+ else if (inode->datalayout ==
+ EROFS_INODE_CHUNK_BASED)
+ u.dic.i_u.c.format =
+ cpu_to_le16(inode->u.chunkformat);
else
u.dic.i_u.raw_blkaddr =
cpu_to_le32(inode->u.i_blkaddr);
@@ -473,6 +484,10 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
if (is_inode_layout_compression(inode))
u.die.i_u.compressed_blocks =
cpu_to_le32(inode->u.i_blocks);
+ else if (inode->datalayout ==
+ EROFS_INODE_CHUNK_BASED)
+ u.die.i_u.c.format =
+ cpu_to_le16(inode->u.chunkformat);
else
u.die.i_u.raw_blkaddr =
cpu_to_le32(inode->u.i_blkaddr);
@@ -505,12 +520,19 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
}
if (inode->extent_isize) {
- /* write compression metadata */
- off = Z_EROFS_VLE_EXTENT_ALIGN(off);
- ret = dev_write(inode->compressmeta, off, inode->extent_isize);
- if (ret)
- return false;
- free(inode->compressmeta);
+ if (inode->datalayout == EROFS_INODE_CHUNK_BASED) {
+ ret = erofs_blob_write_chunk_indexes(inode, off);
+ if (ret)
+ return false;
+ } else {
+ /* write compression metadata */
+ off = Z_EROFS_VLE_EXTENT_ALIGN(off);
+ ret = dev_write(inode->compressmeta, off,
+ inode->extent_isize);
+ if (ret)
+ return false;
+ free(inode->compressmeta);
+ }
}
inode->bh = NULL;
@@ -565,6 +587,8 @@ static int erofs_prepare_inode_buffer(struct erofs_inode *inode)
if (is_inode_layout_compression(inode))
goto noinline;
+ if (inode->datalayout == EROFS_INODE_CHUNK_BASED)
+ goto noinline;
if (cfg.c_noinline_data && S_ISREG(inode->i_mode)) {
inode->datalayout = EROFS_INODE_FLAT_PLAIN;
diff --git a/lib/io.c b/lib/io.c
index 504a69e4bdc1..03c7e3355089 100644
--- a/lib/io.c
+++ b/lib/io.c
@@ -24,7 +24,7 @@
#include "erofs/print.h"
static const char *erofs_devname;
-static int erofs_devfd = -1;
+int erofs_devfd = -1;
static u64 erofs_devsz;
int dev_get_blkdev_size(int fd, u64 *bytes)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 1446cb56db30..3c250c118168 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -83,6 +83,9 @@ Set all file gids to \fIGID\fR.
.B \-\-all-root
Make all files owned by root.
.TP
+.BI "\-\-chunksize " #
+Generate chunk-based files with #-byte chunks.
+.TP
.B \-\-help
Display this help and exit.
.TP
diff --git a/mkfs/main.c b/mkfs/main.c
index addefcefea38..b61205dac91a 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -22,6 +22,7 @@
#include "erofs/exclude.h"
#include "erofs/block_list.h"
#include "erofs/compress_hints.h"
+#include "erofs/blobchunk.h"
#ifdef HAVE_LIBUUID
#include <uuid.h>
@@ -44,6 +45,7 @@ static struct option long_options[] = {
#endif
{"max-extent-bytes", required_argument, NULL, 9},
{"compress-hints", required_argument, NULL, 10},
+ {"chunksize", required_argument, NULL, 11},
#ifdef WITH_ANDROID
{"mount-point", required_argument, NULL, 512},
{"product-out", required_argument, NULL, 513},
@@ -79,6 +81,7 @@ static void usage(void)
#ifdef HAVE_LIBUUID
" -UX use a given filesystem UUID\n"
#endif
+ " --chunksize=X generate chunk-based files with X-byte chunks\n"
" --exclude-path=X avoid including file X (X = exact literal path)\n"
" --exclude-regex=X avoid including files that match X (X = regular expression)\n"
#ifdef HAVE_LIBSELINUX
@@ -321,6 +324,26 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
cfg.c_pclusterblks_max = i / EROFS_BLKSIZ;
cfg.c_pclusterblks_def = cfg.c_pclusterblks_max;
break;
+ case 11:
+ i = strtol(optarg, &endptr, 0);
+ if (*endptr != '\0') {
+ erofs_err("invalid chunksize %s", optarg);
+ return -EINVAL;
+ }
+ cfg.c_chunkbits = ilog2(i);
+ if ((1 << cfg.c_chunkbits) != i) {
+ erofs_err("chunksize %s must be a power of two",
+ optarg);
+ return -EINVAL;
+ }
+ if (i < EROFS_BLKSIZ) {
+ erofs_err("chunksize %s must be larger than block size",
+ optarg);
+ return -EINVAL;
+ }
+ erofs_sb_set_chunked_file();
+ erofs_warn("EXPERIMENTAL chunked file feature in use. Use at your own risk!");
+ break;
case 1:
usage();
@@ -528,6 +551,12 @@ int main(int argc, char **argv)
return 1;
}
+ if (cfg.c_chunkbits) {
+ err = erofs_blob_init();
+ if (err)
+ return 1;
+ }
+
err = lstat64(cfg.c_src_path, &st);
if (err)
return 1;
@@ -622,6 +651,13 @@ int main(int argc, char **argv)
root_nid = erofs_lookupnid(root_inode);
erofs_iput(root_inode);
+ if (cfg.c_chunkbits) {
+ erofs_info("total metadata: %u blocks", erofs_mapbh(NULL));
+ err = erofs_blob_remap();
+ if (err)
+ goto exit;
+ }
+
err = erofs_mkfs_update_super_block(sb_bh, root_nid, &nblocks);
if (err)
goto exit;
@@ -642,6 +678,8 @@ exit:
dev_close();
erofs_cleanup_compress_hints();
erofs_cleanup_exclude_rules();
+ if (cfg.c_chunkbits)
+ erofs_blob_exit();
erofs_exit_configure();
if (err) {
--
2.24.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files
2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
@ 2021-09-22 19:07 ` Gao Xiang
0 siblings, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2021-09-22 19:07 UTC (permalink / raw)
To: linux-erofs; +Cc: Liu Jiang, Liu Bo, Peng Tao
On Thu, Sep 23, 2021 at 02:56:07AM +0800, Gao Xiang wrote:
> mkfs support for the new chunk-based uncompressed files,
> including:
> * chunk-based files with 4-byte block address array;
> * chunk-based files with 8-byte inode chunk indexes.
>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
> include/erofs/blobchunk.h | 18 ++++
> include/erofs/config.h | 1 +
> include/erofs/defs.h | 77 ++++++++++++++
> include/erofs/hashtable.h | 77 --------------
> include/erofs/internal.h | 1 +
> include/erofs/io.h | 2 +
> lib/Makefile.am | 2 +-
> lib/blobchunk.c | 217 ++++++++++++++++++++++++++++++++++++++
> lib/inode.c | 36 +++++--
> lib/io.c | 2 +-
> man/mkfs.erofs.1 | 3 +
> mkfs/main.c | 38 +++++++
> 12 files changed, 389 insertions(+), 85 deletions(-)
> create mode 100644 include/erofs/blobchunk.h
> create mode 100644 lib/blobchunk.c
>
Applying following diff to fix up the MacOS build:
diff --git a/configure.ac b/configure.ac
index 9d7d5c2..03387f5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -170,7 +170,8 @@ AC_CHECK_FUNCS(m4_flatten([
strdup
strerror
strrchr
- strtoull]))
+ strtoull
+ tmpfile64]))
# Configure debug mode
AS_IF([test "x$enable_debug" != "xno"], [], [
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index e05d0cb..725b517 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -208,7 +208,11 @@ void erofs_blob_exit(void)
int erofs_blob_init(void)
{
+#ifdef HAVE_TMPFILE64
blobfile = tmpfile64();
+#else
+ blobfile = tmpfile();
+#endif
if (!blobfile)
return -ENOMEM;
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2021-09-22 19:07 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-22 18:56 [PATCH v3 0/5] erofs-utils: add support for chunk-based files Gao Xiang
2021-09-22 18:56 ` [PATCH v3 1/5] erofs-utils: fuse: support reading chunk-based uncompressed files Gao Xiang
2021-09-22 18:56 ` [PATCH v3 2/5] erofs-utils: introduce hashmap from git source Gao Xiang
2021-09-22 18:56 ` [PATCH v3 3/5] erofs-utils: introduce sha256 Gao Xiang
2021-09-22 18:56 ` [PATCH v3 4/5] erofs-utils: introduce copy_file_range Gao Xiang
2021-09-22 18:56 ` [PATCH v3 5/5] erofs-utils: mkfs: support chunk-based uncompressed files Gao Xiang
2021-09-22 19:07 ` Gao Xiang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.