* [PATCH v3 1/8] staging: erofs: add compacted ondisk compression indexes
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:22 ` [PATCH v3 2/8] staging: erofs: add compacted compression indexes support Gao Xiang
` (7 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
This patch introduces new compacted compression indexes.
In contract to legacy compression indexes that
each 4k logical cluster has an 8-byte index,
compacted ondisk compression indexes will have
amortized 2 bytes for each 4k logical cluster (compacted 2B)
amortized 4 bytes for each 4k logical cluster (compacted 4B)
In detail, several continuous clusters will be encoded in
a compacted pack with cluster types, offsets, and one blkaddr
at the end of the pack to leave 4-byte margin for better
decoding performance, as illustrated below:
_____________________________________________
|___@_____ encoded bits __________|_ blkaddr _|
0 . amortized * vcnt
. .
. . amortized * vcnt - 4
. .
.___________________.
|_type_|_clusterofs_|
Note that compacted 2 / 4B should be aligned with 32 / 8 bytes
in order to avoid each pack crossing page boundary.
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
change log v3:
- wrap up offset for better readability pointed out by Chao.
drivers/staging/erofs/data.c | 4 +-
drivers/staging/erofs/erofs_fs.h | 65 ++++++++++++++++++++++++-------
drivers/staging/erofs/inode.c | 5 +--
drivers/staging/erofs/internal.h | 11 ++----
drivers/staging/erofs/unzip_vle.c | 8 ++--
5 files changed, 62 insertions(+), 31 deletions(-)
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
index 746685f90564..cc31c3e5984c 100644
--- a/drivers/staging/erofs/data.c
+++ b/drivers/staging/erofs/data.c
@@ -124,7 +124,7 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
- lastblk = nblocks - is_inode_layout_inline(inode);
+ lastblk = nblocks - is_inode_flat_inline(inode);
if (unlikely(offset >= inode->i_size)) {
/* leave out-of-bound access unmapped */
@@ -139,7 +139,7 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
if (offset < blknr_to_addr(lastblk)) {
map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
map->m_plen = blknr_to_addr(lastblk) - offset;
- } else if (is_inode_layout_inline(inode)) {
+ } else if (is_inode_flat_inline(inode)) {
/* 2 - inode inline B: inode, [xattrs], inline last blk... */
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
diff --git a/drivers/staging/erofs/erofs_fs.h b/drivers/staging/erofs/erofs_fs.h
index 8ddb2b3e7d39..9a9aaf2d9fbb 100644
--- a/drivers/staging/erofs/erofs_fs.h
+++ b/drivers/staging/erofs/erofs_fs.h
@@ -49,19 +49,29 @@ struct erofs_super_block {
* erofs inode data mapping:
* 0 - inode plain without inline data A:
* inode, [xattrs], ... | ... | no-holed data
- * 1 - inode VLE compression B:
+ * 1 - inode VLE compression B (legacy):
* inode, [xattrs], extents ... | ...
* 2 - inode plain with inline data C:
* inode, [xattrs], last_inline_data, ... | ... | no-holed data
- * 3~7 - reserved
+ * 3 - inode compression D:
+ * inode, [xattrs], map_header, extents ... | ...
+ * 4~7 - reserved
*/
enum {
- EROFS_INODE_LAYOUT_PLAIN,
- EROFS_INODE_LAYOUT_COMPRESSION,
- EROFS_INODE_LAYOUT_INLINE,
+ EROFS_INODE_FLAT_PLAIN,
+ EROFS_INODE_FLAT_COMPRESSION_LEGACY,
+ EROFS_INODE_FLAT_INLINE,
+ EROFS_INODE_FLAT_COMPRESSION,
EROFS_INODE_LAYOUT_MAX
};
+static bool erofs_inode_is_data_compressed(unsigned int datamode)
+{
+ if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+ return true;
+ return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+}
+
/* bit definitions of inode i_advise */
#define EROFS_I_VERSION_BITS 1
#define EROFS_I_DATA_MAPPING_BITS 3
@@ -176,11 +186,39 @@ struct erofs_xattr_entry {
sizeof(struct erofs_xattr_entry) + \
(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
-/* have to be aligned with 8 bytes on disk */
-struct erofs_extent_header {
- __le32 eh_checksum;
- __le32 eh_reserved[3];
-} __packed;
+/* available compression algorithm types */
+enum {
+ Z_EROFS_COMPRESSION_LZ4,
+ Z_EROFS_COMPRESSION_MAX
+};
+
+/*
+ * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
+ * e.g. for 4k logical cluster size, 4B if compacted 2B is off;
+ * (4B) + 2B + (4B) if compacted 2B is on.
+ */
+#define Z_EROFS_ADVISE_COMPACTED_2B_BIT 0
+
+#define Z_EROFS_ADVISE_COMPACTED_2B (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT)
+
+struct z_erofs_map_header {
+ __le32 h_reserved1;
+ __le16 h_advise;
+ /*
+ * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
+ * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
+ */
+ __u8 h_algorithmtype;
+ /*
+ * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
+ * bit 3-4 : (physical - logical) cluster bits of head 1:
+ * For example, if logical clustersize = 4096, 1 for 8192.
+ * bit 5-7 : (physical - logical) cluster bits of head 2.
+ */
+ __u8 h_clusterbits;
+};
+
+#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8
/*
* Z_EROFS Variable-sized Logical Extent cluster type:
@@ -236,8 +274,9 @@ struct z_erofs_vle_decompressed_index {
} di_u __packed; /* 8 bytes */
} __packed;
-#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \
- sizeof(struct z_erofs_vle_decompressed_index))
+#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
+ (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
+ sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
/* dirent sorts in alphabet order, thus we can do binary search */
struct erofs_dirent {
@@ -270,7 +309,7 @@ static inline void erofs_check_ondisk_layout_definitions(void)
BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64);
BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
- BUILD_BUG_ON(sizeof(struct erofs_extent_header) != 16);
+ BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
index e51348f7e838..3539290b8e45 100644
--- a/drivers/staging/erofs/inode.c
+++ b/drivers/staging/erofs/inode.c
@@ -127,12 +127,9 @@ static int fill_inline_data(struct inode *inode, void *data,
{
struct erofs_vnode *vi = EROFS_V(inode);
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
- const int mode = vi->datamode;
-
- DBG_BUGON(mode >= EROFS_INODE_LAYOUT_MAX);
/* should be inode inline C */
- if (mode != EROFS_INODE_LAYOUT_INLINE)
+ if (!is_inode_flat_inline(inode))
return 0;
/* fast symlink (following ext4) */
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index 1666cceecb3c..c851d0be6cf6 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -382,19 +382,14 @@ static inline unsigned long inode_datablocks(struct inode *inode)
return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
}
-static inline bool is_inode_layout_plain(struct inode *inode)
-{
- return EROFS_V(inode)->datamode == EROFS_INODE_LAYOUT_PLAIN;
-}
-
static inline bool is_inode_layout_compression(struct inode *inode)
{
- return EROFS_V(inode)->datamode == EROFS_INODE_LAYOUT_COMPRESSION;
+ return erofs_inode_is_data_compressed(EROFS_V(inode)->datamode);
}
-static inline bool is_inode_layout_inline(struct inode *inode)
+static inline bool is_inode_flat_inline(struct inode *inode)
{
- return EROFS_V(inode)->datamode == EROFS_INODE_LAYOUT_INLINE;
+ return EROFS_V(inode)->datamode == EROFS_INODE_FLAT_INLINE;
}
extern const struct super_operations erofs_sops;
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index f3d0d2c03939..ae62293d5a82 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -1642,8 +1642,8 @@ vle_extent_blkaddr(struct inode *inode, pgoff_t index)
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
struct erofs_vnode *vi = EROFS_V(inode);
- unsigned int ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
- vi->xattr_isize) + sizeof(struct erofs_extent_header) +
+ unsigned int ofs = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(vi->inode_isize +
+ vi->xattr_isize) +
index * sizeof(struct z_erofs_vle_decompressed_index);
return erofs_blknr(iloc(sbi, vi->nid) + ofs);
@@ -1655,8 +1655,8 @@ vle_extent_blkoff(struct inode *inode, pgoff_t index)
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
struct erofs_vnode *vi = EROFS_V(inode);
- unsigned int ofs = Z_EROFS_VLE_EXTENT_ALIGN(vi->inode_isize +
- vi->xattr_isize) + sizeof(struct erofs_extent_header) +
+ unsigned int ofs = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(vi->inode_isize +
+ vi->xattr_isize) +
index * sizeof(struct z_erofs_vle_decompressed_index);
return erofs_blkoff(iloc(sbi, vi->nid) + ofs);
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 2/8] staging: erofs: add compacted compression indexes support
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
2019-06-24 7:22 ` [PATCH v3 1/8] staging: erofs: add compacted ondisk compression indexes Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:22 ` [PATCH v3 3/8] staging: erofs: move per-CPU buffers implementation to utils.c Gao Xiang
` (6 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
This patch aims at compacted compression indexes:
1) cleanup z_erofs_map_blocks_iter and move into zmap.c;
2) add compacted 4/2B decoding support.
On kirin980 platform, sequential read is increased about
6% (725MiB/s -> 770MiB/s) on enwik9 dataset if compacted 2B
feature is enabled.
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
change log v3:
- wrap up offset for better readability pointed out by Chao.
drivers/staging/erofs/Makefile | 2 +-
drivers/staging/erofs/inode.c | 7 +-
drivers/staging/erofs/internal.h | 18 +-
drivers/staging/erofs/unzip_vle.c | 286 ------------------
drivers/staging/erofs/zmap.c | 462 ++++++++++++++++++++++++++++++
5 files changed, 480 insertions(+), 295 deletions(-)
create mode 100644 drivers/staging/erofs/zmap.c
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
index a34248a2a16a..84b412c7a991 100644
--- a/drivers/staging/erofs/Makefile
+++ b/drivers/staging/erofs/Makefile
@@ -9,5 +9,5 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
ccflags-y += -I $(srctree)/$(src)/include
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_vle_lz4.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_vle_lz4.o zmap.o
diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
index 3539290b8e45..1d467322bacf 100644
--- a/drivers/staging/erofs/inode.c
+++ b/drivers/staging/erofs/inode.c
@@ -210,12 +210,7 @@ static int fill_inode(struct inode *inode, int isdir)
}
if (is_inode_layout_compression(inode)) {
-#ifdef CONFIG_EROFS_FS_ZIP
- inode->i_mapping->a_ops =
- &z_erofs_vle_normalaccess_aops;
-#else
- err = -ENOTSUPP;
-#endif
+ err = z_erofs_fill_inode(inode);
goto out_unlock;
}
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index c851d0be6cf6..f3063b13c117 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -339,9 +339,11 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
/* atomic flag definitions */
#define EROFS_V_EA_INITED_BIT 0
+#define EROFS_V_Z_INITED_BIT 1
/* bitlock definitions (arranged in reverse order) */
#define EROFS_V_BL_XATTR_BIT (BITS_PER_LONG - 1)
+#define EROFS_V_BL_Z_BIT (BITS_PER_LONG - 2)
struct erofs_vnode {
erofs_nid_t nid;
@@ -356,8 +358,17 @@ struct erofs_vnode {
unsigned xattr_shared_count;
unsigned *xattr_shared_xattrs;
- erofs_blk_t raw_blkaddr;
-
+ union {
+ erofs_blk_t raw_blkaddr;
+#ifdef CONFIG_EROFS_FS_ZIP
+ struct {
+ unsigned short z_advise;
+ unsigned char z_algorithmtype[2];
+ unsigned char z_logical_clusterbits;
+ unsigned char z_physical_clusterbits[2];
+ };
+#endif
+ };
/* the corresponding vfs inode */
struct inode vfs_inode;
};
@@ -447,11 +458,14 @@ struct erofs_map_blocks {
/* Flags used by erofs_map_blocks() */
#define EROFS_GET_BLOCKS_RAW 0x0001
+/* zmap.c */
#ifdef CONFIG_EROFS_FS_ZIP
+int z_erofs_fill_inode(struct inode *inode);
int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int flags);
#else
+static inline int z_erofs_fill_inode(struct inode *inode) { return -ENOTSUPP; }
static inline int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index ae62293d5a82..8aea938172df 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -1600,289 +1600,3 @@ const struct address_space_operations z_erofs_vle_normalaccess_aops = {
.readpages = z_erofs_vle_normalaccess_readpages,
};
-/*
- * Variable-sized Logical Extent (Fixed Physical Cluster) Compression Mode
- * ---
- * VLE compression mode attempts to compress a number of logical data into
- * a physical cluster with a fixed size.
- * VLE compression mode uses "struct z_erofs_vle_decompressed_index".
- */
-#define __vle_cluster_advise(x, bit, bits) \
- ((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1))
-
-#define __vle_cluster_type(advise) __vle_cluster_advise(advise, \
- Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT, Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS)
-
-#define vle_cluster_type(di) \
- __vle_cluster_type((di)->di_advise)
-
-static int
-vle_decompressed_index_clusterofs(unsigned int *clusterofs,
- unsigned int clustersize,
- struct z_erofs_vle_decompressed_index *di)
-{
- switch (vle_cluster_type(di)) {
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
- *clusterofs = clustersize;
- break;
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
- *clusterofs = le16_to_cpu(di->di_clusterofs);
- break;
- default:
- DBG_BUGON(1);
- return -EIO;
- }
- return 0;
-}
-
-static inline erofs_blk_t
-vle_extent_blkaddr(struct inode *inode, pgoff_t index)
-{
- struct erofs_sb_info *sbi = EROFS_I_SB(inode);
- struct erofs_vnode *vi = EROFS_V(inode);
-
- unsigned int ofs = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(vi->inode_isize +
- vi->xattr_isize) +
- index * sizeof(struct z_erofs_vle_decompressed_index);
-
- return erofs_blknr(iloc(sbi, vi->nid) + ofs);
-}
-
-static inline unsigned int
-vle_extent_blkoff(struct inode *inode, pgoff_t index)
-{
- struct erofs_sb_info *sbi = EROFS_I_SB(inode);
- struct erofs_vnode *vi = EROFS_V(inode);
-
- unsigned int ofs = Z_EROFS_VLE_LEGACY_INDEX_ALIGN(vi->inode_isize +
- vi->xattr_isize) +
- index * sizeof(struct z_erofs_vle_decompressed_index);
-
- return erofs_blkoff(iloc(sbi, vi->nid) + ofs);
-}
-
-struct vle_map_blocks_iter_ctx {
- struct inode *inode;
- struct super_block *sb;
- unsigned int clusterbits;
-
- struct page **mpage_ret;
- void **kaddr_ret;
-};
-
-static int
-vle_get_logical_extent_head(const struct vle_map_blocks_iter_ctx *ctx,
- unsigned int lcn, /* logical cluster number */
- unsigned long long *ofs,
- erofs_blk_t *pblk,
- unsigned int *flags)
-{
- const unsigned int clustersize = 1 << ctx->clusterbits;
- const erofs_blk_t mblk = vle_extent_blkaddr(ctx->inode, lcn);
- struct page *mpage = *ctx->mpage_ret; /* extent metapage */
-
- struct z_erofs_vle_decompressed_index *di;
- unsigned int cluster_type, delta0;
-
- if (mpage->index != mblk) {
- kunmap_atomic(*ctx->kaddr_ret);
- unlock_page(mpage);
- put_page(mpage);
-
- mpage = erofs_get_meta_page(ctx->sb, mblk, false);
- if (IS_ERR(mpage)) {
- *ctx->mpage_ret = NULL;
- return PTR_ERR(mpage);
- }
- *ctx->mpage_ret = mpage;
- *ctx->kaddr_ret = kmap_atomic(mpage);
- }
-
- di = *ctx->kaddr_ret + vle_extent_blkoff(ctx->inode, lcn);
-
- cluster_type = vle_cluster_type(di);
- switch (cluster_type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
- delta0 = le16_to_cpu(di->di_u.delta[0]);
- if (unlikely(!delta0 || delta0 > lcn)) {
- errln("invalid NONHEAD dl0 %u at lcn %u of nid %llu",
- delta0, lcn, EROFS_V(ctx->inode)->nid);
- DBG_BUGON(1);
- return -EIO;
- }
- return vle_get_logical_extent_head(ctx,
- lcn - delta0, ofs, pblk, flags);
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- *flags ^= EROFS_MAP_ZIPPED;
- /* fallthrough */
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
- /* clustersize should be a power of two */
- *ofs = ((u64)lcn << ctx->clusterbits) +
- (le16_to_cpu(di->di_clusterofs) & (clustersize - 1));
- *pblk = le32_to_cpu(di->di_u.blkaddr);
- break;
- default:
- errln("unknown cluster type %u at lcn %u of nid %llu",
- cluster_type, lcn, EROFS_V(ctx->inode)->nid);
- DBG_BUGON(1);
- return -EIO;
- }
- return 0;
-}
-
-int z_erofs_map_blocks_iter(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags)
-{
- void *kaddr;
- const struct vle_map_blocks_iter_ctx ctx = {
- .inode = inode,
- .sb = inode->i_sb,
- .clusterbits = EROFS_I_SB(inode)->clusterbits,
- .mpage_ret = &map->mpage,
- .kaddr_ret = &kaddr
- };
- const unsigned int clustersize = 1 << ctx.clusterbits;
- /* if both m_(l,p)len are 0, regularize l_lblk, l_lofs, etc... */
- const bool initial = !map->m_llen;
-
- /* logicial extent (start, end) offset */
- unsigned long long ofs, end;
- unsigned int lcn;
- u32 ofs_rem;
-
- /* initialize `pblk' to keep gcc from printing foolish warnings */
- erofs_blk_t mblk, pblk = 0;
- struct page *mpage = map->mpage;
- struct z_erofs_vle_decompressed_index *di;
- unsigned int cluster_type, logical_cluster_ofs;
- int err = 0;
-
- trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
-
- /* when trying to read beyond EOF, leave it unmapped */
- if (unlikely(map->m_la >= inode->i_size)) {
- DBG_BUGON(!initial);
- map->m_llen = map->m_la + 1 - inode->i_size;
- map->m_la = inode->i_size;
- map->m_flags = 0;
- goto out;
- }
-
- debugln("%s, m_la %llu m_llen %llu --- start", __func__,
- map->m_la, map->m_llen);
-
- ofs = map->m_la + map->m_llen;
-
- /* clustersize should be power of two */
- lcn = ofs >> ctx.clusterbits;
- ofs_rem = ofs & (clustersize - 1);
-
- mblk = vle_extent_blkaddr(inode, lcn);
-
- if (!mpage || mpage->index != mblk) {
- if (mpage)
- put_page(mpage);
-
- mpage = erofs_get_meta_page(ctx.sb, mblk, false);
- if (IS_ERR(mpage)) {
- err = PTR_ERR(mpage);
- goto out;
- }
- map->mpage = mpage;
- } else {
- lock_page(mpage);
- DBG_BUGON(!PageUptodate(mpage));
- }
-
- kaddr = kmap_atomic(mpage);
- di = kaddr + vle_extent_blkoff(inode, lcn);
-
- debugln("%s, lcn %u mblk %u e_blkoff %u", __func__, lcn,
- mblk, vle_extent_blkoff(inode, lcn));
-
- err = vle_decompressed_index_clusterofs(&logical_cluster_ofs,
- clustersize, di);
- if (unlikely(err))
- goto unmap_out;
-
- if (!initial) {
- /* [walking mode] 'map' has been already initialized */
- map->m_llen += logical_cluster_ofs;
- goto unmap_out;
- }
-
- /* by default, compressed */
- map->m_flags |= EROFS_MAP_ZIPPED;
-
- end = ((u64)lcn + 1) * clustersize;
-
- cluster_type = vle_cluster_type(di);
-
- switch (cluster_type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- if (ofs_rem >= logical_cluster_ofs)
- map->m_flags ^= EROFS_MAP_ZIPPED;
- /* fallthrough */
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
- if (ofs_rem == logical_cluster_ofs) {
- pblk = le32_to_cpu(di->di_u.blkaddr);
- goto exact_hitted;
- }
-
- if (ofs_rem > logical_cluster_ofs) {
- ofs = (u64)lcn * clustersize | logical_cluster_ofs;
- pblk = le32_to_cpu(di->di_u.blkaddr);
- break;
- }
-
- /* logical cluster number should be >= 1 */
- if (unlikely(!lcn)) {
- errln("invalid logical cluster 0 at nid %llu",
- EROFS_V(inode)->nid);
- err = -EIO;
- goto unmap_out;
- }
- end = ((u64)lcn-- * clustersize) | logical_cluster_ofs;
- /* fallthrough */
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
- /* get the correspoinding first chunk */
- err = vle_get_logical_extent_head(&ctx, lcn, &ofs,
- &pblk, &map->m_flags);
- mpage = map->mpage;
-
- if (unlikely(err)) {
- if (mpage)
- goto unmap_out;
- goto out;
- }
- break;
- default:
- errln("unknown cluster type %u at offset %llu of nid %llu",
- cluster_type, ofs, EROFS_V(inode)->nid);
- err = -EIO;
- goto unmap_out;
- }
-
- map->m_la = ofs;
-exact_hitted:
- map->m_llen = end - ofs;
- map->m_plen = clustersize;
- map->m_pa = blknr_to_addr(pblk);
- map->m_flags |= EROFS_MAP_MAPPED;
-unmap_out:
- kunmap_atomic(kaddr);
- unlock_page(mpage);
-out:
- debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
- __func__, map->m_la, map->m_pa,
- map->m_llen, map->m_plen, map->m_flags);
-
- trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
-
- /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
- DBG_BUGON(err < 0 && err != -ENOMEM);
- return err;
-}
-
diff --git a/drivers/staging/erofs/zmap.c b/drivers/staging/erofs/zmap.c
new file mode 100644
index 000000000000..1e75cef11db4
--- /dev/null
+++ b/drivers/staging/erofs/zmap.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/zmap.c
+ *
+ * Copyright (C) 2018-2019 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "internal.h"
+#include <asm/unaligned.h>
+#include <trace/events/erofs.h>
+
+int z_erofs_fill_inode(struct inode *inode)
+{
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ struct super_block *const sb = inode->i_sb;
+
+ if (vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+ vi->z_advise = 0;
+ vi->z_algorithmtype[0] = 0;
+ vi->z_algorithmtype[1] = 0;
+ vi->z_logical_clusterbits = EROFS_SB(sb)->clusterbits;
+ vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
+ vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
+ set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
+ }
+
+ inode->i_mapping->a_ops = &z_erofs_vle_normalaccess_aops;
+ return 0;
+}
+
+static int fill_inode_lazy(struct inode *inode)
+{
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ struct super_block *const sb = inode->i_sb;
+ int err;
+ erofs_off_t pos;
+ struct page *page;
+ void *kaddr;
+ struct z_erofs_map_header *h;
+
+ if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
+ return 0;
+
+ if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_Z_BIT, TASK_KILLABLE))
+ return -ERESTARTSYS;
+
+ err = 0;
+ if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
+ goto out_unlock;
+
+ DBG_BUGON(vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
+
+ pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
+ vi->xattr_isize, 8);
+ page = erofs_get_meta_page(sb, erofs_blknr(pos), false);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out_unlock;
+ }
+
+ kaddr = kmap_atomic(page);
+
+ h = kaddr + erofs_blkoff(pos);
+ vi->z_advise = le16_to_cpu(h->h_advise);
+ vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
+ vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
+
+ if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
+ errln("unknown compression format %u for nid %llu, please upgrade kernel",
+ vi->z_algorithmtype[0], vi->nid);
+ err = -ENOTSUPP;
+ goto unmap_done;
+ }
+
+ vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
+ vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits +
+ ((h->h_clusterbits >> 3) & 3);
+
+ if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) {
+ errln("unsupported physical clusterbits %u for nid %llu, please upgrade kernel",
+ vi->z_physical_clusterbits[0], vi->nid);
+ err = -ENOTSUPP;
+ goto unmap_done;
+ }
+
+ vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
+ ((h->h_clusterbits >> 5) & 7);
+unmap_done:
+ kunmap_atomic(kaddr);
+ unlock_page(page);
+ put_page(page);
+
+ set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
+out_unlock:
+ clear_and_wake_up_bit(EROFS_V_BL_Z_BIT, &vi->flags);
+ return err;
+}
+
+struct z_erofs_maprecorder {
+ struct inode *inode;
+ struct erofs_map_blocks *map;
+ void *kaddr;
+
+ unsigned long lcn;
+ /* compression extent information gathered */
+ u8 type;
+ u16 clusterofs;
+ u16 delta[2];
+ erofs_blk_t pblk;
+};
+
+static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
+ erofs_blk_t eblk)
+{
+ struct super_block *const sb = m->inode->i_sb;
+ struct erofs_map_blocks *const map = m->map;
+ struct page *mpage = map->mpage;
+
+ if (mpage) {
+ if (mpage->index == eblk) {
+ if (!m->kaddr)
+ m->kaddr = kmap_atomic(mpage);
+ return 0;
+ }
+
+ if (m->kaddr) {
+ kunmap_atomic(m->kaddr);
+ m->kaddr = NULL;
+ }
+ put_page(mpage);
+ }
+
+ mpage = erofs_get_meta_page(sb, eblk, false);
+ if (IS_ERR(mpage)) {
+ map->mpage = NULL;
+ return PTR_ERR(mpage);
+ }
+ m->kaddr = kmap_atomic(mpage);
+ unlock_page(mpage);
+ map->mpage = mpage;
+ return 0;
+}
+
+static int vle_legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ unsigned long lcn)
+{
+ struct inode *const inode = m->inode;
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
+ const erofs_off_t pos =
+ Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
+ vi->xattr_isize) +
+ lcn * sizeof(struct z_erofs_vle_decompressed_index);
+ struct z_erofs_vle_decompressed_index *di;
+ unsigned int advise, type;
+ int err;
+
+ err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+ if (err)
+ return err;
+
+ m->lcn = lcn;
+ di = m->kaddr + erofs_blkoff(pos);
+
+ advise = le16_to_cpu(di->di_advise);
+ type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
+ ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
+ switch (type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ m->clusterofs = 1 << vi->z_logical_clusterbits;
+ m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
+ m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
+ break;
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ m->clusterofs = le16_to_cpu(di->di_clusterofs);
+ m->pblk = le32_to_cpu(di->di_u.blkaddr);
+ break;
+ default:
+ DBG_BUGON(1);
+ return -EIO;
+ }
+ m->type = type;
+ return 0;
+}
+
+static unsigned int decode_compactedbits(unsigned int lobits,
+ unsigned int lomask,
+ u8 *in, unsigned int pos, u8 *type)
+{
+ const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
+ const unsigned int lo = v & lomask;
+
+ *type = (v >> lobits) & 3;
+ return lo;
+}
+
+static int unpack_compacted_index(struct z_erofs_maprecorder *m,
+ unsigned int amortizedshift,
+ unsigned int eofs)
+{
+ struct erofs_vnode *const vi = EROFS_V(m->inode);
+ const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const unsigned int lomask = (1 << lclusterbits) - 1;
+ unsigned int vcnt, base, lo, encodebits, nblk;
+ int i;
+ u8 *in, type;
+
+ if (1 << amortizedshift == 4)
+ vcnt = 2;
+ else if (1 << amortizedshift == 2 && lclusterbits == 12)
+ vcnt = 16;
+ else
+ return -ENOTSUPP;
+
+ encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
+ base = round_down(eofs, vcnt << amortizedshift);
+ in = m->kaddr + base;
+
+ i = (eofs - base) >> amortizedshift;
+
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * i, &type);
+ m->type = type;
+ if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+ m->clusterofs = 1 << lclusterbits;
+ if (i + 1 != vcnt) {
+ m->delta[0] = lo;
+ return 0;
+ }
+ /*
+ * since the last lcluster in the pack is special,
+ * of which lo saves delta[1] rather than delta[0].
+ * Hence, get delta[0] by the previous lcluster indirectly.
+ */
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * (i - 1), &type);
+ if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ lo = 0;
+ m->delta[0] = lo + 1;
+ return 0;
+ }
+ m->clusterofs = lo;
+ m->delta[0] = 0;
+ /* figout out blkaddr (pblk) for HEAD lclusters */
+ nblk = 1;
+ while (i > 0) {
+ --i;
+ lo = decode_compactedbits(lclusterbits, lomask,
+ in, encodebits * i, &type);
+ if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+ i -= lo;
+
+ if (i >= 0)
+ ++nblk;
+ }
+ in += (vcnt << amortizedshift) - sizeof(__le32);
+ m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
+ return 0;
+}
+
+static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ unsigned long lcn)
+{
+ struct inode *const inode = m->inode;
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
+ vi->inode_isize + vi->xattr_isize, 8) +
+ sizeof(struct z_erofs_map_header);
+ const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+ unsigned int compacted_4b_initial, compacted_2b;
+ unsigned int amortizedshift;
+ erofs_off_t pos;
+ int err;
+
+ if (lclusterbits != 12)
+ return -ENOTSUPP;
+
+ if (lcn >= totalidx)
+ return -EINVAL;
+
+ m->lcn = lcn;
+ /* used to align to 32-byte (compacted_2b) alignment */
+ compacted_4b_initial = (32 - ebase % 32) / 4;
+ if (compacted_4b_initial == 32 / 4)
+ compacted_4b_initial = 0;
+
+ if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B)
+ compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
+ else
+ compacted_2b = 0;
+
+ pos = ebase;
+ if (lcn < compacted_4b_initial) {
+ amortizedshift = 2;
+ goto out;
+ }
+ pos += compacted_4b_initial * 4;
+ lcn -= compacted_4b_initial;
+
+ if (lcn < compacted_2b) {
+ amortizedshift = 1;
+ goto out;
+ }
+ pos += compacted_2b * 2;
+ lcn -= compacted_2b;
+ amortizedshift = 2;
+out:
+ pos += lcn * (1 << amortizedshift);
+ err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+ if (err)
+ return err;
+ return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos));
+}
+
+static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+ unsigned int lcn)
+{
+ const unsigned int datamode = EROFS_V(m->inode)->datamode;
+
+ if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+ return vle_legacy_load_cluster_from_disk(m, lcn);
+
+ if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+ return compacted_load_cluster_from_disk(m, lcn);
+
+ return -EINVAL;
+}
+
+static int vle_extent_lookback(struct z_erofs_maprecorder *m,
+ unsigned int lookback_distance)
+{
+ struct erofs_vnode *const vi = EROFS_V(m->inode);
+ struct erofs_map_blocks *const map = m->map;
+ const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ unsigned long lcn = m->lcn;
+ int err;
+
+ if (lcn < lookback_distance) {
+ DBG_BUGON(1);
+ return -EIO;
+ }
+
+ /* load extent head logical cluster if needed */
+ lcn -= lookback_distance;
+ err = vle_load_cluster_from_disk(m, lcn);
+ if (err)
+ return err;
+
+ switch (m->type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ return vle_extent_lookback(m, m->delta[0]);
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ map->m_flags &= ~EROFS_MAP_ZIPPED;
+ /* fallthrough */
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ map->m_la = (lcn << lclusterbits) | m->clusterofs;
+ break;
+ default:
+ errln("unknown type %u at lcn %lu of nid %llu",
+ m->type, lcn, vi->nid);
+ DBG_BUGON(1);
+ return -EIO;
+ }
+ return 0;
+}
+
+int z_erofs_map_blocks_iter(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
+{
+ struct erofs_vnode *const vi = EROFS_V(inode);
+ struct z_erofs_maprecorder m = {
+ .inode = inode,
+ .map = map,
+ };
+ int err = 0;
+ unsigned int lclusterbits, endoff;
+ unsigned long long ofs, end;
+
+ trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
+
+ /* when trying to read beyond EOF, leave it unmapped */
+ if (unlikely(map->m_la >= inode->i_size)) {
+ map->m_llen = map->m_la + 1 - inode->i_size;
+ map->m_la = inode->i_size;
+ map->m_flags = 0;
+ goto out;
+ }
+
+ err = fill_inode_lazy(inode);
+ if (err)
+ goto out;
+
+ lclusterbits = vi->z_logical_clusterbits;
+ ofs = map->m_la;
+ m.lcn = ofs >> lclusterbits;
+ endoff = ofs & ((1 << lclusterbits) - 1);
+
+ err = vle_load_cluster_from_disk(&m, m.lcn);
+ if (err)
+ goto unmap_out;
+
+ map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */
+ end = (m.lcn + 1ULL) << lclusterbits;
+
+ switch (m.type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ if (endoff >= m.clusterofs)
+ map->m_flags &= ~EROFS_MAP_ZIPPED;
+ /* fallthrough */
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ if (endoff >= m.clusterofs) {
+ map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+ break;
+ }
+ /* m.lcn should be >= 1 if endoff < m.clusterofs */
+ if (unlikely(!m.lcn)) {
+ errln("invalid logical cluster 0 at nid %llu",
+ vi->nid);
+ err = -EIO;
+ goto unmap_out;
+ }
+ end = (m.lcn << lclusterbits) | m.clusterofs;
+ m.delta[0] = 1;
+ /* fallthrough */
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ /* get the correspoinding first chunk */
+ err = vle_extent_lookback(&m, m.delta[0]);
+ if (unlikely(err))
+ goto unmap_out;
+ break;
+ default:
+ errln("unknown type %u at offset %llu of nid %llu",
+ m.type, ofs, vi->nid);
+ err = -EIO;
+ goto unmap_out;
+ }
+
+ map->m_llen = end - map->m_la;
+ map->m_plen = 1 << lclusterbits;
+ map->m_pa = blknr_to_addr(m.pblk);
+ map->m_flags |= EROFS_MAP_MAPPED;
+
+unmap_out:
+ if (m.kaddr)
+ kunmap_atomic(m.kaddr);
+
+out:
+ debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
+ __func__, map->m_la, map->m_pa,
+ map->m_llen, map->m_plen, map->m_flags);
+
+ trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
+
+ /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
+ DBG_BUGON(err < 0 && err != -ENOMEM);
+ return err;
+}
+
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 3/8] staging: erofs: move per-CPU buffers implementation to utils.c
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
2019-06-24 7:22 ` [PATCH v3 1/8] staging: erofs: add compacted ondisk compression indexes Gao Xiang
2019-06-24 7:22 ` [PATCH v3 2/8] staging: erofs: add compacted compression indexes support Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:33 ` Chao Yu
2019-06-24 7:22 ` [PATCH v3 4/8] staging: erofs: move stagingpage operations to compress.h Gao Xiang
` (5 subsequent siblings)
8 siblings, 1 reply; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
This patch moves per-CPU buffers to utils.c in order for
the upcoming generic decompression framework to use it.
Note that I tried to use generic per-CPU buffer or
per-CPU page approaches to clean up further, but obvious
performanace regression (about 2% for sequential read) was
observed.
Therefore let's leave it as it is instead, just move
to utils.c and I'll try to dig into the root cause later.
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
change log v3:
- handle error code if returned by erofs_get_pcpubuf() pointed out by Chao.
drivers/staging/erofs/internal.h | 26 +++++++++++++++++++
drivers/staging/erofs/unzip_vle.c | 5 ++--
drivers/staging/erofs/unzip_vle.h | 4 +--
drivers/staging/erofs/unzip_vle_lz4.c | 37 +++++++++++----------------
drivers/staging/erofs/utils.c | 12 +++++++++
5 files changed, 56 insertions(+), 28 deletions(-)
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index f3063b13c117..dcbe6f7f5dae 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -321,6 +321,16 @@ static inline void z_erofs_exit_zip_subsystem(void) {}
/* page count of a compressed cluster */
#define erofs_clusterpages(sbi) ((1 << (sbi)->clusterbits) / PAGE_SIZE)
+#define Z_EROFS_NR_INLINE_PAGEVECS 3
+
+#if (Z_EROFS_CLUSTER_MAX_PAGES > Z_EROFS_NR_INLINE_PAGEVECS)
+#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES
+#else
+#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_NR_INLINE_PAGEVECS
+#endif
+
+#else
+#define EROFS_PCPUBUF_NR_PAGES 0
#endif
typedef u64 erofs_off_t;
@@ -608,6 +618,22 @@ static inline void erofs_vunmap(const void *mem, unsigned int count)
extern struct shrinker erofs_shrinker_info;
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
+
+#if (EROFS_PCPUBUF_NR_PAGES > 0)
+void *erofs_get_pcpubuf(unsigned int pagenr);
+#define erofs_put_pcpubuf(buf) do { \
+ (void)&(buf); \
+ preempt_enable(); \
+} while (0)
+#else
+static inline void *erofs_get_pcpubuf(unsigned int pagenr)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
+
+#define erofs_put_pcpubuf(buf) do {} while (0)
+#endif
+
void erofs_register_super(struct super_block *sb);
void erofs_unregister_super(struct super_block *sb);
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 8aea938172df..08f2d4302ecb 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -552,8 +552,7 @@ static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
if (IS_ERR(work))
return PTR_ERR(work);
got_it:
- z_erofs_pagevec_ctor_init(&builder->vector,
- Z_EROFS_VLE_INLINE_PAGEVECS,
+ z_erofs_pagevec_ctor_init(&builder->vector, Z_EROFS_NR_INLINE_PAGEVECS,
work->pagevec, work->vcnt);
if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
@@ -936,7 +935,7 @@ static int z_erofs_vle_unzip(struct super_block *sb,
for (i = 0; i < nr_pages; ++i)
pages[i] = NULL;
- z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_VLE_INLINE_PAGEVECS,
+ z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
work->pagevec, 0);
for (i = 0; i < work->vcnt; ++i) {
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index 902e67d04029..9c53009700cf 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -44,8 +44,6 @@ static inline bool z_erofs_gather_if_stagingpage(struct list_head *page_pool,
*
*/
-#define Z_EROFS_VLE_INLINE_PAGEVECS 3
-
struct z_erofs_vle_work {
struct mutex lock;
@@ -58,7 +56,7 @@ struct z_erofs_vle_work {
union {
/* L: pagevec */
- erofs_vtptr_t pagevec[Z_EROFS_VLE_INLINE_PAGEVECS];
+ erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
struct rcu_head rcu;
};
};
diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c
index 0daac9b984a8..02e694d9288d 100644
--- a/drivers/staging/erofs/unzip_vle_lz4.c
+++ b/drivers/staging/erofs/unzip_vle_lz4.c
@@ -34,16 +34,6 @@ static int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen)
return -EIO;
}
-#if Z_EROFS_CLUSTER_MAX_PAGES > Z_EROFS_VLE_INLINE_PAGEVECS
-#define EROFS_PERCPU_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES
-#else
-#define EROFS_PERCPU_NR_PAGES Z_EROFS_VLE_INLINE_PAGEVECS
-#endif
-
-static struct {
- char data[PAGE_SIZE * EROFS_PERCPU_NR_PAGES];
-} erofs_pcpubuf[NR_CPUS];
-
int z_erofs_vle_plain_copy(struct page **compressed_pages,
unsigned int clusterpages,
struct page **pages,
@@ -56,8 +46,9 @@ int z_erofs_vle_plain_copy(struct page **compressed_pages,
char *percpu_data;
bool mirrored[Z_EROFS_CLUSTER_MAX_PAGES] = { 0 };
- preempt_disable();
- percpu_data = erofs_pcpubuf[smp_processor_id()].data;
+ percpu_data = erofs_get_pcpubuf(0);
+ if (IS_ERR(percpu_data))
+ return PTR_ERR(percpu_data);
j = 0;
for (i = 0; i < nr_pages; j = i++) {
@@ -117,7 +108,7 @@ int z_erofs_vle_plain_copy(struct page **compressed_pages,
if (src && !mirrored[j])
kunmap_atomic(src);
- preempt_enable();
+ erofs_put_pcpubuf(percpu_data);
return 0;
}
@@ -131,7 +122,7 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
unsigned int nr_pages, i, j;
int ret;
- if (outlen + pageofs > EROFS_PERCPU_NR_PAGES * PAGE_SIZE)
+ if (outlen + pageofs > EROFS_PCPUBUF_NR_PAGES * PAGE_SIZE)
return -ENOTSUPP;
nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE);
@@ -144,8 +135,9 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
return -ENOMEM;
}
- preempt_disable();
- vout = erofs_pcpubuf[smp_processor_id()].data;
+ vout = erofs_get_pcpubuf(0);
+ if (IS_ERR(vout))
+ return PTR_ERR(vout);
ret = z_erofs_unzip_lz4(vin, vout + pageofs,
clusterpages * PAGE_SIZE, outlen);
@@ -174,7 +166,7 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
}
out:
- preempt_enable();
+ erofs_put_pcpubuf(vout);
if (clusterpages == 1)
kunmap_atomic(vin);
@@ -196,8 +188,9 @@ int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
int ret;
if (overlapped) {
- preempt_disable();
- vin = erofs_pcpubuf[smp_processor_id()].data;
+ vin = erofs_get_pcpubuf(0);
+ if (IS_ERR(vin))
+ return PTR_ERR(vin);
for (i = 0; i < clusterpages; ++i) {
void *t = kmap_atomic(compressed_pages[i]);
@@ -216,13 +209,13 @@ int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
if (ret > 0)
ret = 0;
- if (!overlapped) {
+ if (overlapped) {
+ erofs_put_pcpubuf(vin);
+ } else {
if (clusterpages == 1)
kunmap_atomic(vin);
else
erofs_vunmap(vin, clusterpages);
- } else {
- preempt_enable();
}
return ret;
}
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index 3e7d30b6de1d..4bbd3bf34acd 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -27,6 +27,18 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
return page;
}
+#if (EROFS_PCPUBUF_NR_PAGES > 0)
+static struct {
+ u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
+} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
+
+void *erofs_get_pcpubuf(unsigned int pagenr)
+{
+ preempt_disable();
+ return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
+}
+#endif
+
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v3 3/8] staging: erofs: move per-CPU buffers implementation to utils.c
2019-06-24 7:22 ` [PATCH v3 3/8] staging: erofs: move per-CPU buffers implementation to utils.c Gao Xiang
@ 2019-06-24 7:33 ` Chao Yu
0 siblings, 0 replies; 11+ messages in thread
From: Chao Yu @ 2019-06-24 7:33 UTC (permalink / raw)
To: Gao Xiang, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
On 2019/6/24 15:22, Gao Xiang wrote:
> From: Gao Xiang <gaoxiang25@huawei.com>
>
> This patch moves per-CPU buffers to utils.c in order for
> the upcoming generic decompression framework to use it.
>
> Note that I tried to use generic per-CPU buffer or
> per-CPU page approaches to clean up further, but obvious
> performanace regression (about 2% for sequential read) was
> observed.
>
> Therefore let's leave it as it is instead, just move
> to utils.c and I'll try to dig into the root cause later.
>
> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Thanks,
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 4/8] staging: erofs: move stagingpage operations to compress.h
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
` (2 preceding siblings ...)
2019-06-24 7:22 ` [PATCH v3 3/8] staging: erofs: move per-CPU buffers implementation to utils.c Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:22 ` [PATCH v3 5/8] staging: erofs: introduce generic decompression backend Gao Xiang
` (4 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
stagingpages are behaved as bounce pages for temporary use.
Move to compress.h since the upcoming decompressor will
allocate stagingpages as well.
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
drivers/staging/erofs/compress.h | 40 +++++++++++++++++++++++++++++++
drivers/staging/erofs/unzip_vle.c | 11 +++++----
drivers/staging/erofs/unzip_vle.h | 20 ----------------
3 files changed, 46 insertions(+), 25 deletions(-)
create mode 100644 drivers/staging/erofs/compress.h
diff --git a/drivers/staging/erofs/compress.h b/drivers/staging/erofs/compress.h
new file mode 100644
index 000000000000..1dcfc3b35118
--- /dev/null
+++ b/drivers/staging/erofs/compress.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/drivers/staging/erofs/compress.h
+ *
+ * Copyright (C) 2019 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_COMPRESS_H
+#define __EROFS_FS_COMPRESS_H
+
+/*
+ * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
+ * used to mark temporary allocated pages from other
+ * file/cached pages and NULL mapping pages.
+ */
+#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
+
+/* check if a page is marked as staging */
+static inline bool z_erofs_page_is_staging(struct page *page)
+{
+ return page->mapping == Z_EROFS_MAPPING_STAGING;
+}
+
+static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
+ struct page *page)
+{
+ if (!z_erofs_page_is_staging(page))
+ return false;
+
+ /* staging pages should not be used by others at the same time */
+ if (page_ref_count(page) > 1)
+ put_page(page);
+ else
+ list_add(&page->lru, pagepool);
+ return true;
+}
+
+#endif
+
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 08f2d4302ecb..d95f985936b6 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -11,6 +11,7 @@
* distribution for more details.
*/
#include "unzip_vle.h"
+#include "compress.h"
#include <linux/prefetch.h>
#include <trace/events/erofs.h>
@@ -855,7 +856,7 @@ static inline void z_erofs_vle_read_endio(struct bio *bio)
DBG_BUGON(PageUptodate(page));
DBG_BUGON(!page->mapping);
- if (unlikely(!sbi && !z_erofs_is_stagingpage(page))) {
+ if (unlikely(!sbi && !z_erofs_page_is_staging(page))) {
sbi = EROFS_SB(page->mapping->host->i_sb);
if (time_to_inject(sbi, FAULT_READ_IO)) {
@@ -947,7 +948,7 @@ static int z_erofs_vle_unzip(struct super_block *sb,
DBG_BUGON(!page);
DBG_BUGON(!page->mapping);
- if (z_erofs_gather_if_stagingpage(page_pool, page))
+ if (z_erofs_put_stagingpage(page_pool, page))
continue;
if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
@@ -977,7 +978,7 @@ static int z_erofs_vle_unzip(struct super_block *sb,
DBG_BUGON(!page);
DBG_BUGON(!page->mapping);
- if (!z_erofs_is_stagingpage(page)) {
+ if (!z_erofs_page_is_staging(page)) {
if (erofs_page_is_managed(sbi, page)) {
if (unlikely(!PageUptodate(page)))
err = -EIO;
@@ -1055,7 +1056,7 @@ static int z_erofs_vle_unzip(struct super_block *sb,
continue;
/* recycle all individual staging pages */
- (void)z_erofs_gather_if_stagingpage(page_pool, page);
+ (void)z_erofs_put_stagingpage(page_pool, page);
WRITE_ONCE(compressed_pages[i], NULL);
}
@@ -1068,7 +1069,7 @@ static int z_erofs_vle_unzip(struct super_block *sb,
DBG_BUGON(!page->mapping);
/* recycle all individual staging pages */
- if (z_erofs_gather_if_stagingpage(page_pool, page))
+ if (z_erofs_put_stagingpage(page_pool, page))
continue;
if (unlikely(err < 0))
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index 9c53009700cf..6c3e0deb63e7 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -16,26 +16,6 @@
#include "internal.h"
#include "unzip_pagevec.h"
-/*
- * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
- * used for temporary allocated pages (via erofs_allocpage),
- * in order to seperate those from NULL mapping (eg. truncated pages)
- */
-#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
-
-#define z_erofs_is_stagingpage(page) \
- ((page)->mapping == Z_EROFS_MAPPING_STAGING)
-
-static inline bool z_erofs_gather_if_stagingpage(struct list_head *page_pool,
- struct page *page)
-{
- if (z_erofs_is_stagingpage(page)) {
- list_add(&page->lru, page_pool);
- return true;
- }
- return false;
-}
-
/*
* Structure fields follow one of the following exclusion rules.
*
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 5/8] staging: erofs: introduce generic decompression backend
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
` (3 preceding siblings ...)
2019-06-24 7:22 ` [PATCH v3 4/8] staging: erofs: move stagingpage operations to compress.h Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:22 ` [PATCH v3 6/8] staging: erofs: introduce LZ4 decompression inplace Gao Xiang
` (3 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
This patch adds a new generic decompression framework
in order to replace the old LZ4-specific decompression code.
Even though LZ4 is still the only supported algorithm, yet
it is more cleaner and easy to integrate new algorithm than
the old almost hard-coded decompression backend.
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
change log v3:
- move extra 0PADDING logic (used as a decompression inplace enable mark)
to the following patch;
drivers/staging/erofs/Makefile | 2 +-
drivers/staging/erofs/compress.h | 21 ++
drivers/staging/erofs/decompressor.c | 301 +++++++++++++++++++++++++++
3 files changed, 323 insertions(+), 1 deletion(-)
create mode 100644 drivers/staging/erofs/decompressor.c
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
index 84b412c7a991..adeb5d6e2668 100644
--- a/drivers/staging/erofs/Makefile
+++ b/drivers/staging/erofs/Makefile
@@ -9,5 +9,5 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
ccflags-y += -I $(srctree)/$(src)/include
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_vle_lz4.o zmap.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_vle_lz4.o zmap.o decompressor.o
diff --git a/drivers/staging/erofs/compress.h b/drivers/staging/erofs/compress.h
index 1dcfc3b35118..ebeccb1f4eae 100644
--- a/drivers/staging/erofs/compress.h
+++ b/drivers/staging/erofs/compress.h
@@ -9,6 +9,24 @@
#ifndef __EROFS_FS_COMPRESS_H
#define __EROFS_FS_COMPRESS_H
+#include "internal.h"
+
+enum {
+ Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
+ Z_EROFS_COMPRESSION_RUNTIME_MAX
+};
+
+struct z_erofs_decompress_req {
+ struct page **in, **out;
+
+ unsigned short pageofs_out;
+ unsigned int inputsize, outputsize;
+
+ /* indicate the algorithm will be used for decompression */
+ unsigned int alg;
+ bool inplace_io, partial_decoding;
+};
+
/*
* - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
* used to mark temporary allocated pages from other
@@ -36,5 +54,8 @@ static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
return true;
}
+int z_erofs_decompress(struct z_erofs_decompress_req *rq,
+ struct list_head *pagepool);
+
#endif
diff --git a/drivers/staging/erofs/decompressor.c b/drivers/staging/erofs/decompressor.c
new file mode 100644
index 000000000000..df8fd68a338b
--- /dev/null
+++ b/drivers/staging/erofs/decompressor.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/staging/erofs/decompressor.c
+ *
+ * Copyright (C) 2019 HUAWEI, Inc.
+ * http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "compress.h"
+#include <linux/lz4.h>
+
+#ifndef LZ4_DISTANCE_MAX /* history window size */
+#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
+#endif
+
+#define LZ4_MAX_DISTANCE_PAGES DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE)
+
+struct z_erofs_decompressor {
+ /*
+ * if destpages have sparsed pages, fill them with bounce pages.
+ * it also check whether destpages indicate continuous physical memory.
+ */
+ int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
+ struct list_head *pagepool);
+ int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out);
+ char *name;
+};
+
+static int lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
+ struct list_head *pagepool)
+{
+ const unsigned int nr =
+ PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
+ unsigned long unused[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
+ BITS_PER_LONG)] = { 0 };
+ void *kaddr = NULL;
+ unsigned int i, j, k;
+
+ for (i = 0; i < nr; ++i) {
+ struct page *const page = rq->out[i];
+
+ j = i & (LZ4_MAX_DISTANCE_PAGES - 1);
+ if (availables[j])
+ __set_bit(j, unused);
+
+ if (page) {
+ if (kaddr) {
+ if (kaddr + PAGE_SIZE == page_address(page))
+ kaddr += PAGE_SIZE;
+ else
+ kaddr = NULL;
+ } else if (!i) {
+ kaddr = page_address(page);
+ }
+ continue;
+ }
+ kaddr = NULL;
+
+ k = find_first_bit(unused, LZ4_MAX_DISTANCE_PAGES);
+ if (k < LZ4_MAX_DISTANCE_PAGES) {
+ j = k;
+ get_page(availables[j]);
+ } else {
+ DBG_BUGON(availables[j]);
+
+ if (!list_empty(pagepool)) {
+ availables[j] = lru_to_page(pagepool);
+ list_del(&availables[j]->lru);
+ DBG_BUGON(page_ref_count(availables[j]) != 1);
+ } else {
+ availables[j] = alloc_pages(GFP_KERNEL, 0);
+ if (!availables[j])
+ return -ENOMEM;
+ }
+ availables[j]->mapping = Z_EROFS_MAPPING_STAGING;
+ }
+ rq->out[i] = availables[j];
+ __clear_bit(j, unused);
+ }
+ return kaddr ? 1 : 0;
+}
+
+static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq,
+ u8 *src, unsigned int pageofs_in)
+{
+ /*
+ * if in-place decompression is ongoing, those decompressed
+ * pages should be copied in order to avoid being overlapped.
+ */
+ struct page **in = rq->in;
+ u8 *const tmp = erofs_get_pcpubuf(0);
+ u8 *tmpp = tmp;
+ unsigned int inlen = rq->inputsize - pageofs_in;
+ unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
+
+ while (tmpp < tmp + inlen) {
+ if (!src)
+ src = kmap_atomic(*in);
+ memcpy(tmpp, src + pageofs_in, count);
+ kunmap_atomic(src);
+ src = NULL;
+ tmpp += count;
+ pageofs_in = 0;
+ count = PAGE_SIZE;
+ ++in;
+ }
+ return tmp;
+}
+
+static int lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
+{
+ unsigned int inputmargin, inlen;
+ u8 *src;
+ bool copied;
+ int ret;
+
+ if (rq->inputsize > PAGE_SIZE)
+ return -ENOTSUPP;
+
+ src = kmap_atomic(*rq->in);
+ inputmargin = 0;
+
+ copied = false;
+ inlen = rq->inputsize - inputmargin;
+ if (rq->inplace_io) {
+ src = generic_copy_inplace_data(rq, src, inputmargin);
+ inputmargin = 0;
+ copied = true;
+ }
+
+ ret = LZ4_decompress_safe_partial(src + inputmargin, out,
+ inlen, rq->outputsize,
+ rq->outputsize);
+ if (ret < 0) {
+ errln("%s, failed to decompress, in[%p, %u, %u] out[%p, %u]",
+ __func__, src + inputmargin, inlen, inputmargin,
+ out, rq->outputsize);
+ WARN_ON(1);
+ print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
+ 16, 1, src + inputmargin, inlen, true);
+ print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
+ 16, 1, out, rq->outputsize, true);
+ ret = -EIO;
+ }
+
+ if (copied)
+ erofs_put_pcpubuf(src);
+ else
+ kunmap_atomic(src);
+ return ret;
+}
+
+static struct z_erofs_decompressor decompressors[] = {
+ [Z_EROFS_COMPRESSION_SHIFTED] = {
+ .name = "shifted"
+ },
+ [Z_EROFS_COMPRESSION_LZ4] = {
+ .prepare_destpages = lz4_prepare_destpages,
+ .decompress = lz4_decompress,
+ .name = "lz4"
+ },
+};
+
+static void copy_from_pcpubuf(struct page **out, const char *dst,
+ unsigned short pageofs_out,
+ unsigned int outputsize)
+{
+ const char *end = dst + outputsize;
+ const unsigned int righthalf = PAGE_SIZE - pageofs_out;
+ const char *cur = dst - pageofs_out;
+
+ while (cur < end) {
+ struct page *const page = *out++;
+
+ if (page) {
+ char *buf = kmap_atomic(page);
+
+ if (cur >= dst) {
+ memcpy(buf, cur, min_t(uint, PAGE_SIZE,
+ end - cur));
+ } else {
+ memcpy(buf + pageofs_out, cur + pageofs_out,
+ min_t(uint, righthalf, end - cur));
+ }
+ kunmap_atomic(buf);
+ }
+ cur += PAGE_SIZE;
+ }
+}
+
+static int decompress_generic(struct z_erofs_decompress_req *rq,
+ struct list_head *pagepool)
+{
+ const unsigned int nrpages_out =
+ PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ const struct z_erofs_decompressor *alg = decompressors + rq->alg;
+ unsigned int dst_maptype;
+ void *dst;
+ int ret;
+
+ if (nrpages_out == 1 && !rq->inplace_io) {
+ DBG_BUGON(!*rq->out);
+ dst = kmap_atomic(*rq->out);
+ dst_maptype = 0;
+ goto dstmap_out;
+ }
+
+ /*
+ * For the case of small output size (especially much less
+ * than PAGE_SIZE), memcpy the decompressed data rather than
+ * compressed data is preferred.
+ */
+ if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
+ dst = erofs_get_pcpubuf(0);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ rq->inplace_io = false;
+ ret = alg->decompress(rq, dst);
+ if (!ret)
+ copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
+ rq->outputsize);
+
+ erofs_put_pcpubuf(dst);
+ return ret;
+ }
+
+ ret = alg->prepare_destpages(rq, pagepool);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ dst = page_address(*rq->out);
+ dst_maptype = 1;
+ goto dstmap_out;
+ }
+
+ dst = erofs_vmap(rq->out, nrpages_out);
+ if (!dst)
+ return -ENOMEM;
+ dst_maptype = 2;
+
+dstmap_out:
+ ret = alg->decompress(rq, dst + rq->pageofs_out);
+
+ if (!dst_maptype)
+ kunmap_atomic(dst);
+ else if (dst_maptype == 2)
+ erofs_vunmap(dst, nrpages_out);
+ return ret;
+}
+
+static int shifted_decompress(const struct z_erofs_decompress_req *rq,
+ struct list_head *pagepool)
+{
+ const unsigned int nrpages_out =
+ PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out;
+ unsigned char *src, *dst;
+
+ if (nrpages_out > 2) {
+ DBG_BUGON(1);
+ return -EIO;
+ }
+
+ if (rq->out[0] == *rq->in) {
+ DBG_BUGON(nrpages_out != 1);
+ return 0;
+ }
+
+ src = kmap_atomic(*rq->in);
+ if (!rq->out[0]) {
+ dst = NULL;
+ } else {
+ dst = kmap_atomic(rq->out[0]);
+ memcpy(dst + rq->pageofs_out, src, righthalf);
+ }
+
+ if (rq->out[1] == *rq->in) {
+ memmove(src, src + righthalf, rq->pageofs_out);
+ } else if (nrpages_out == 2) {
+ if (dst)
+ kunmap_atomic(dst);
+ DBG_BUGON(!rq->out[1]);
+ dst = kmap_atomic(rq->out[1]);
+ memcpy(dst, src + righthalf, rq->pageofs_out);
+ }
+ if (dst)
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+ return 0;
+}
+
+int z_erofs_decompress(struct z_erofs_decompress_req *rq,
+ struct list_head *pagepool)
+{
+ if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED)
+ return shifted_decompress(rq, pagepool);
+ return decompress_generic(rq, pagepool);
+}
+
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 6/8] staging: erofs: introduce LZ4 decompression inplace
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
` (4 preceding siblings ...)
2019-06-24 7:22 ` [PATCH v3 5/8] staging: erofs: introduce generic decompression backend Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:22 ` [PATCH v3 7/8] staging: erofs: switch to new decompression backend Gao Xiang
` (2 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
compressed data will be usually loaded into last pages of
the extent (the last page for 4k) for in-place decompression
(more specifically, in-place IO), as ilustration below,
start of compressed logical extent
| end of this logical extent
| |
______v___________________________v________
... | page 6 | page 7 | page 8 | page 9 | ...
|__________|__________|__________|__________|
. ^ . ^
. |compressed|
. | data |
. . .
|< dstsize >|<margin>|
oend iend
op ip
Therefore, it's possible to do decompression inplace (thus no
memcpy at all) if the margin is sufficient and safe enough [1],
and it can be implemented only for fixed-size output compression
compared with fixed-size input compression.
No memcpy for most of in-place IO (about 99% of enwik9) after
decompression inplace is implemented and sequential read will
be improved of course (see the following patches for test results).
[1] https://github.com/lz4/lz4/commit/b17f578a919b7e6b078cede2d52be29dd48c8e8c
https://github.com/lz4/lz4/commit/5997e139f53169fa3a1c1b4418d2452a90b01602
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
drivers/staging/erofs/compress.h | 1 +
drivers/staging/erofs/decompressor.c | 36 ++++++++++++++++++++++++----
drivers/staging/erofs/erofs_fs.h | 3 ++-
3 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/drivers/staging/erofs/compress.h b/drivers/staging/erofs/compress.h
index ebeccb1f4eae..c43aa3374d28 100644
--- a/drivers/staging/erofs/compress.h
+++ b/drivers/staging/erofs/compress.h
@@ -17,6 +17,7 @@ enum {
};
struct z_erofs_decompress_req {
+ struct super_block *sb;
struct page **in, **out;
unsigned short pageofs_out;
diff --git a/drivers/staging/erofs/decompressor.c b/drivers/staging/erofs/decompressor.c
index df8fd68a338b..80f1f39719ba 100644
--- a/drivers/staging/erofs/decompressor.c
+++ b/drivers/staging/erofs/decompressor.c
@@ -14,6 +14,9 @@
#endif
#define LZ4_MAX_DISTANCE_PAGES DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE)
+#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
+#endif
struct z_erofs_decompressor {
/*
@@ -112,7 +115,7 @@ static int lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
{
unsigned int inputmargin, inlen;
u8 *src;
- bool copied;
+ bool copied, support_0padding;
int ret;
if (rq->inputsize > PAGE_SIZE)
@@ -120,13 +123,38 @@ static int lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
src = kmap_atomic(*rq->in);
inputmargin = 0;
+ support_0padding = false;
+
+ /* decompression inplace is only safe when 0padding is enabled */
+ if (EROFS_SB(rq->sb)->requirements & EROFS_REQUIREMENT_LZ4_0PADDING) {
+ support_0padding = true;
+
+ while (!src[inputmargin & ~PAGE_MASK])
+ if (!(++inputmargin & ~PAGE_MASK))
+ break;
+
+ if (inputmargin >= rq->inputsize) {
+ kunmap_atomic(src);
+ return -EIO;
+ }
+ }
copied = false;
inlen = rq->inputsize - inputmargin;
if (rq->inplace_io) {
- src = generic_copy_inplace_data(rq, src, inputmargin);
- inputmargin = 0;
- copied = true;
+ const uint oend = (rq->pageofs_out +
+ rq->outputsize) & ~PAGE_MASK;
+ const uint nr = PAGE_ALIGN(rq->pageofs_out +
+ rq->outputsize) >> PAGE_SHIFT;
+
+ if (rq->partial_decoding || !support_0padding ||
+ rq->out[nr - 1] != rq->in[0] ||
+ rq->inputsize - oend <
+ LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) {
+ src = generic_copy_inplace_data(rq, src, inputmargin);
+ inputmargin = 0;
+ copied = true;
+ }
}
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
diff --git a/drivers/staging/erofs/erofs_fs.h b/drivers/staging/erofs/erofs_fs.h
index 9a9aaf2d9fbb..9f61abb7c1ca 100644
--- a/drivers/staging/erofs/erofs_fs.h
+++ b/drivers/staging/erofs/erofs_fs.h
@@ -21,7 +21,8 @@
* Any bits that aren't in EROFS_ALL_REQUIREMENTS should be
* incompatible with this kernel version.
*/
-#define EROFS_ALL_REQUIREMENTS 0
+#define EROFS_REQUIREMENT_LZ4_0PADDING 0x00000001
+#define EROFS_ALL_REQUIREMENTS EROFS_REQUIREMENT_LZ4_0PADDING
struct erofs_super_block {
/* 0 */__le32 magic; /* in the little endian */
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 7/8] staging: erofs: switch to new decompression backend
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
` (5 preceding siblings ...)
2019-06-24 7:22 ` [PATCH v3 6/8] staging: erofs: introduce LZ4 decompression inplace Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:22 ` [PATCH v3 8/8] staging: erofs: integrate decompression inplace Gao Xiang
2019-06-24 7:34 ` [PATCH v3 0/8] staging: erofs: decompression inplace approach Chao Yu
8 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
This patch integrates new decompression framework to
erofs decompression path, and remove the old
decompression implementation as well.
On kirin980 platform, sequential read is slightly
improved to 778MiB/s after the new decompression
backend is used.
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
drivers/staging/erofs/Makefile | 2 +-
drivers/staging/erofs/internal.h | 6 -
drivers/staging/erofs/unzip_vle.c | 59 +++----
drivers/staging/erofs/unzip_vle.h | 15 +-
drivers/staging/erofs/unzip_vle_lz4.c | 222 --------------------------
5 files changed, 24 insertions(+), 280 deletions(-)
delete mode 100644 drivers/staging/erofs/unzip_vle_lz4.c
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
index adeb5d6e2668..e704d9e51514 100644
--- a/drivers/staging/erofs/Makefile
+++ b/drivers/staging/erofs/Makefile
@@ -9,5 +9,5 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
ccflags-y += -I $(srctree)/$(src)/include
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_vle_lz4.o zmap.o decompressor.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o zmap.o decompressor.o
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index dcbe6f7f5dae..6c8767d4a1d5 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -321,14 +321,8 @@ static inline void z_erofs_exit_zip_subsystem(void) {}
/* page count of a compressed cluster */
#define erofs_clusterpages(sbi) ((1 << (sbi)->clusterbits) / PAGE_SIZE)
-#define Z_EROFS_NR_INLINE_PAGEVECS 3
-#if (Z_EROFS_CLUSTER_MAX_PAGES > Z_EROFS_NR_INLINE_PAGEVECS)
#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES
-#else
-#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_NR_INLINE_PAGEVECS
-#endif
-
#else
#define EROFS_PCPUBUF_NR_PAGES 0
#endif
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index d95f985936b6..cb870b83f3c8 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -897,12 +897,12 @@ static int z_erofs_vle_unzip(struct super_block *sb,
unsigned int sparsemem_pages = 0;
struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
struct page **pages, **compressed_pages, *page;
- unsigned int i, llen;
+ unsigned int algorithm;
+ unsigned int i, outputsize;
enum z_erofs_page_type page_type;
bool overlapped;
struct z_erofs_vle_work *work;
- void *vout;
int err;
might_sleep();
@@ -1009,43 +1009,26 @@ static int z_erofs_vle_unzip(struct super_block *sb,
if (unlikely(err))
goto out;
- llen = (nr_pages << PAGE_SHIFT) - work->pageofs;
-
- if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) {
- err = z_erofs_vle_plain_copy(compressed_pages, clusterpages,
- pages, nr_pages, work->pageofs);
- goto out;
- }
-
- if (llen > grp->llen)
- llen = grp->llen;
-
- err = z_erofs_vle_unzip_fast_percpu(compressed_pages, clusterpages,
- pages, llen, work->pageofs);
- if (err != -ENOTSUPP)
- goto out;
-
- if (sparsemem_pages >= nr_pages)
- goto skip_allocpage;
-
- for (i = 0; i < nr_pages; ++i) {
- if (pages[i])
- continue;
-
- pages[i] = __stagingpage_alloc(page_pool, GFP_NOFS);
- }
-
-skip_allocpage:
- vout = erofs_vmap(pages, nr_pages);
- if (!vout) {
- err = -ENOMEM;
- goto out;
- }
-
- err = z_erofs_vle_unzip_vmap(compressed_pages, clusterpages, vout,
- llen, work->pageofs, overlapped);
+ if (nr_pages << PAGE_SHIFT >= work->pageofs + grp->llen)
+ outputsize = grp->llen;
+ else
+ outputsize = (nr_pages << PAGE_SHIFT) - work->pageofs;
- erofs_vunmap(vout, nr_pages);
+ if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN)
+ algorithm = Z_EROFS_COMPRESSION_SHIFTED;
+ else
+ algorithm = Z_EROFS_COMPRESSION_LZ4;
+
+ err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+ .sb = sb,
+ .in = compressed_pages,
+ .out = pages,
+ .pageofs_out = work->pageofs,
+ .inputsize = PAGE_SIZE,
+ .outputsize = outputsize,
+ .alg = algorithm,
+ .inplace_io = overlapped,
+ .partial_decoding = true }, page_pool);
out:
/* must handle all compressed pages before endding pages */
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index 6c3e0deb63e7..a2d9b60beebd 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -16,6 +16,8 @@
#include "internal.h"
#include "unzip_pagevec.h"
+#define Z_EROFS_NR_INLINE_PAGEVECS 3
+
/*
* Structure fields follow one of the following exclusion rules.
*
@@ -189,18 +191,5 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES 2048
-/* unzip_vle_lz4.c */
-int z_erofs_vle_plain_copy(struct page **compressed_pages,
- unsigned int clusterpages, struct page **pages,
- unsigned int nr_pages, unsigned short pageofs);
-int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
- unsigned int clusterpages,
- struct page **pages, unsigned int outlen,
- unsigned short pageofs);
-int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
- unsigned int clusterpages,
- void *vaddr, unsigned int llen,
- unsigned short pageofs, bool overlapped);
-
#endif
diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c
deleted file mode 100644
index 02e694d9288d..000000000000
--- a/drivers/staging/erofs/unzip_vle_lz4.c
+++ /dev/null
@@ -1,222 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/drivers/staging/erofs/unzip_vle_lz4.c
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- * http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of the Linux
- * distribution for more details.
- */
-#include "unzip_vle.h"
-#include <linux/lz4.h>
-
-static int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen)
-{
- int ret = LZ4_decompress_safe_partial(in, out, inlen, outlen, outlen);
-
- if (ret >= 0)
- return ret;
-
- /*
- * LZ4_decompress_safe_partial will return an error code
- * (< 0) if decompression failed
- */
- errln("%s, failed to decompress, in[%p, %zu] outlen[%p, %zu]",
- __func__, in, inlen, out, outlen);
- WARN_ON(1);
- print_hex_dump(KERN_DEBUG, "raw data [in]: ", DUMP_PREFIX_OFFSET,
- 16, 1, in, inlen, true);
- print_hex_dump(KERN_DEBUG, "raw data [out]: ", DUMP_PREFIX_OFFSET,
- 16, 1, out, outlen, true);
- return -EIO;
-}
-
-int z_erofs_vle_plain_copy(struct page **compressed_pages,
- unsigned int clusterpages,
- struct page **pages,
- unsigned int nr_pages,
- unsigned short pageofs)
-{
- unsigned int i, j;
- void *src = NULL;
- const unsigned int righthalf = PAGE_SIZE - pageofs;
- char *percpu_data;
- bool mirrored[Z_EROFS_CLUSTER_MAX_PAGES] = { 0 };
-
- percpu_data = erofs_get_pcpubuf(0);
- if (IS_ERR(percpu_data))
- return PTR_ERR(percpu_data);
-
- j = 0;
- for (i = 0; i < nr_pages; j = i++) {
- struct page *page = pages[i];
- void *dst;
-
- if (!page) {
- if (src) {
- if (!mirrored[j])
- kunmap_atomic(src);
- src = NULL;
- }
- continue;
- }
-
- dst = kmap_atomic(page);
-
- for (; j < clusterpages; ++j) {
- if (compressed_pages[j] != page)
- continue;
-
- DBG_BUGON(mirrored[j]);
- memcpy(percpu_data + j * PAGE_SIZE, dst, PAGE_SIZE);
- mirrored[j] = true;
- break;
- }
-
- if (i) {
- if (!src)
- src = mirrored[i - 1] ?
- percpu_data + (i - 1) * PAGE_SIZE :
- kmap_atomic(compressed_pages[i - 1]);
-
- memcpy(dst, src + righthalf, pageofs);
-
- if (!mirrored[i - 1])
- kunmap_atomic(src);
-
- if (unlikely(i >= clusterpages)) {
- kunmap_atomic(dst);
- break;
- }
- }
-
- if (!righthalf) {
- src = NULL;
- } else {
- src = mirrored[i] ? percpu_data + i * PAGE_SIZE :
- kmap_atomic(compressed_pages[i]);
-
- memcpy(dst + pageofs, src, righthalf);
- }
-
- kunmap_atomic(dst);
- }
-
- if (src && !mirrored[j])
- kunmap_atomic(src);
-
- erofs_put_pcpubuf(percpu_data);
- return 0;
-}
-
-int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
- unsigned int clusterpages,
- struct page **pages,
- unsigned int outlen,
- unsigned short pageofs)
-{
- void *vin, *vout;
- unsigned int nr_pages, i, j;
- int ret;
-
- if (outlen + pageofs > EROFS_PCPUBUF_NR_PAGES * PAGE_SIZE)
- return -ENOTSUPP;
-
- nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE);
-
- if (clusterpages == 1) {
- vin = kmap_atomic(compressed_pages[0]);
- } else {
- vin = erofs_vmap(compressed_pages, clusterpages);
- if (!vin)
- return -ENOMEM;
- }
-
- vout = erofs_get_pcpubuf(0);
- if (IS_ERR(vout))
- return PTR_ERR(vout);
-
- ret = z_erofs_unzip_lz4(vin, vout + pageofs,
- clusterpages * PAGE_SIZE, outlen);
-
- if (ret < 0)
- goto out;
- ret = 0;
-
- for (i = 0; i < nr_pages; ++i) {
- j = min((unsigned int)PAGE_SIZE - pageofs, outlen);
-
- if (pages[i]) {
- if (clusterpages == 1 &&
- pages[i] == compressed_pages[0]) {
- memcpy(vin + pageofs, vout + pageofs, j);
- } else {
- void *dst = kmap_atomic(pages[i]);
-
- memcpy(dst + pageofs, vout + pageofs, j);
- kunmap_atomic(dst);
- }
- }
- vout += PAGE_SIZE;
- outlen -= j;
- pageofs = 0;
- }
-
-out:
- erofs_put_pcpubuf(vout);
-
- if (clusterpages == 1)
- kunmap_atomic(vin);
- else
- erofs_vunmap(vin, clusterpages);
-
- return ret;
-}
-
-int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
- unsigned int clusterpages,
- void *vout,
- unsigned int llen,
- unsigned short pageofs,
- bool overlapped)
-{
- void *vin;
- unsigned int i;
- int ret;
-
- if (overlapped) {
- vin = erofs_get_pcpubuf(0);
- if (IS_ERR(vin))
- return PTR_ERR(vin);
-
- for (i = 0; i < clusterpages; ++i) {
- void *t = kmap_atomic(compressed_pages[i]);
-
- memcpy(vin + PAGE_SIZE * i, t, PAGE_SIZE);
- kunmap_atomic(t);
- }
- } else if (clusterpages == 1) {
- vin = kmap_atomic(compressed_pages[0]);
- } else {
- vin = erofs_vmap(compressed_pages, clusterpages);
- }
-
- ret = z_erofs_unzip_lz4(vin, vout + pageofs,
- clusterpages * PAGE_SIZE, llen);
- if (ret > 0)
- ret = 0;
-
- if (overlapped) {
- erofs_put_pcpubuf(vin);
- } else {
- if (clusterpages == 1)
- kunmap_atomic(vin);
- else
- erofs_vunmap(vin, clusterpages);
- }
- return ret;
-}
-
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v3 8/8] staging: erofs: integrate decompression inplace
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
` (6 preceding siblings ...)
2019-06-24 7:22 ` [PATCH v3 7/8] staging: erofs: switch to new decompression backend Gao Xiang
@ 2019-06-24 7:22 ` Gao Xiang
2019-06-24 7:34 ` [PATCH v3 0/8] staging: erofs: decompression inplace approach Chao Yu
8 siblings, 0 replies; 11+ messages in thread
From: Gao Xiang @ 2019-06-24 7:22 UTC (permalink / raw)
To: Chao Yu, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei, Gao Xiang
From: Gao Xiang <gaoxiang25@huawei.com>
Decompressor needs to know whether it's a partial
or full decompression since only full decompression
can be decompressed in-place.
On kirin980 platform, sequential read is finally
increased to 812MiB/s after decompression inplace
is enabled.
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
drivers/staging/erofs/internal.h | 3 +++
drivers/staging/erofs/unzip_vle.c | 15 +++++++++++----
drivers/staging/erofs/unzip_vle.h | 1 +
drivers/staging/erofs/zmap.c | 1 +
4 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index 6c8767d4a1d5..963cc1b8b896 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -441,6 +441,7 @@ extern const struct address_space_operations z_erofs_vle_normalaccess_aops;
*/
enum {
BH_Zipped = BH_PrivateStart,
+ BH_FullMapped,
};
/* Has a disk mapping */
@@ -449,6 +450,8 @@ enum {
#define EROFS_MAP_META (1 << BH_Meta)
/* The extent has been compressed */
#define EROFS_MAP_ZIPPED (1 << BH_Zipped)
+/* The length of extent is full */
+#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
struct erofs_map_blocks {
erofs_off_t m_pa, m_la;
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index cb870b83f3c8..316382d33783 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -469,6 +469,9 @@ z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f,
Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
+ if (map->m_flags & EROFS_MAP_FULL_MAPPED)
+ grp->flags |= Z_EROFS_VLE_WORKGRP_FULL_LENGTH;
+
/* new workgrps have been claimed as type 1 */
WRITE_ONCE(grp->next, *f->owned_head);
/* primary and followed work for all new workgrps */
@@ -901,7 +904,7 @@ static int z_erofs_vle_unzip(struct super_block *sb,
unsigned int i, outputsize;
enum z_erofs_page_type page_type;
- bool overlapped;
+ bool overlapped, partial;
struct z_erofs_vle_work *work;
int err;
@@ -1009,10 +1012,13 @@ static int z_erofs_vle_unzip(struct super_block *sb,
if (unlikely(err))
goto out;
- if (nr_pages << PAGE_SHIFT >= work->pageofs + grp->llen)
+ if (nr_pages << PAGE_SHIFT >= work->pageofs + grp->llen) {
outputsize = grp->llen;
- else
+ partial = !(grp->flags & Z_EROFS_VLE_WORKGRP_FULL_LENGTH);
+ } else {
outputsize = (nr_pages << PAGE_SHIFT) - work->pageofs;
+ partial = true;
+ }
if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN)
algorithm = Z_EROFS_COMPRESSION_SHIFTED;
@@ -1028,7 +1034,8 @@ static int z_erofs_vle_unzip(struct super_block *sb,
.outputsize = outputsize,
.alg = algorithm,
.inplace_io = overlapped,
- .partial_decoding = true }, page_pool);
+ .partial_decoding = partial
+ }, page_pool);
out:
/* must handle all compressed pages before endding pages */
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index a2d9b60beebd..ab509d75aefd 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -46,6 +46,7 @@ struct z_erofs_vle_work {
#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN 0
#define Z_EROFS_VLE_WORKGRP_FMT_LZ4 1
#define Z_EROFS_VLE_WORKGRP_FMT_MASK 1
+#define Z_EROFS_VLE_WORKGRP_FULL_LENGTH 2
typedef void *z_erofs_vle_owned_workgrp_t;
diff --git a/drivers/staging/erofs/zmap.c b/drivers/staging/erofs/zmap.c
index 1e75cef11db4..9c0bd65c46bf 100644
--- a/drivers/staging/erofs/zmap.c
+++ b/drivers/staging/erofs/zmap.c
@@ -424,6 +424,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
goto unmap_out;
}
end = (m.lcn << lclusterbits) | m.clusterofs;
+ map->m_flags |= EROFS_MAP_FULL_MAPPED;
m.delta[0] = 1;
/* fallthrough */
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
--
2.17.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v3 0/8] staging: erofs: decompression inplace approach
2019-06-24 7:22 [PATCH v3 0/8] staging: erofs: decompression inplace approach Gao Xiang
` (7 preceding siblings ...)
2019-06-24 7:22 ` [PATCH v3 8/8] staging: erofs: integrate decompression inplace Gao Xiang
@ 2019-06-24 7:34 ` Chao Yu
8 siblings, 0 replies; 11+ messages in thread
From: Chao Yu @ 2019-06-24 7:34 UTC (permalink / raw)
To: Gao Xiang, Greg Kroah-Hartman
Cc: devel, LKML, linux-fsdevel, linux-erofs, Chao Yu, Miao Xie,
Fang Wei, Du Wei
On 2019/6/24 15:22, Gao Xiang wrote:
> This is patch v3 of erofs decompression inplace approach, which is sent
> out by my personal email since I'm out of office to attend Open Source
> Summit China 2019 these days. No major change from PATCH v2 since no
> noticeable issue raised from landing to our products till now, mainly
> as a response to Chao's suggestions.
>
> See the bottom lines which are taken from RFC PATCH v1 and describe
> the principle of these technologies.
>
> The series is based on the latest staging-next since all dependencies
> have already been merged.
>
> changelog from v2:
> - wrap up some offsets as marcos;
> - add some error handling for erofs_get_pcpubuf();
> - move some decompression inplace stuffes from PATCH 5 -> 6.
Thanks for the update, those all changes look good to me. :)
Thanks,
^ permalink raw reply [flat|nested] 11+ messages in thread