* [PATCH v2] f2fs: clean up post-read processing
@ 2020-12-28 23:26 ` Eric Biggers
0 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2020-12-28 23:26 UTC (permalink / raw)
To: linux-f2fs-devel; +Cc: linux-fscrypt
From: Eric Biggers <ebiggers@google.com>
Rework the post-read processing logic to be much easier to understand.
At least one bug is fixed by this: if an I/O error occurred when reading
from disk, decryption and verity would be performed on the uninitialized
data, causing misleading messages in the kernel log.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
v2: rebased onto v5.11-rc1.
fs/f2fs/compress.c | 159 +++++++++++-----
fs/f2fs/data.c | 349 ++++++++++++++----------------------
fs/f2fs/f2fs.h | 31 +++-
include/trace/events/f2fs.h | 4 +-
4 files changed, 271 insertions(+), 272 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 4bcbacfe33259..66888b108f400 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -721,38 +721,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
return ret;
}
-void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
+static void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
{
- struct decompress_io_ctx *dic =
- (struct decompress_io_ctx *)page_private(page);
struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
- struct f2fs_inode_info *fi= F2FS_I(dic->inode);
+ struct f2fs_inode_info *fi = F2FS_I(dic->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
int ret;
int i;
- dec_page_count(sbi, F2FS_RD_DATA);
-
- if (bio->bi_status || PageError(page))
- dic->failed = true;
-
- if (atomic_dec_return(&dic->pending_pages))
- return;
-
- trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
- dic->cluster_size, fi->i_compress_algorithm);
+ trace_f2fs_decompress_cluster_start(dic->inode, dic->cluster_idx,
+ dic->cluster_size,
+ fi->i_compress_algorithm);
- /* submit partial compressed pages */
if (dic->failed) {
ret = -EIO;
- goto out_free_dic;
+ goto out_end_io;
}
dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
if (!dic->tpages) {
ret = -ENOMEM;
- goto out_free_dic;
+ goto out_end_io;
}
for (i = 0; i < dic->cluster_size; i++) {
@@ -764,20 +754,20 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
dic->tpages[i] = f2fs_compress_alloc_page();
if (!dic->tpages[i]) {
ret = -ENOMEM;
- goto out_free_dic;
+ goto out_end_io;
}
}
if (cops->init_decompress_ctx) {
ret = cops->init_decompress_ctx(dic);
if (ret)
- goto out_free_dic;
+ goto out_end_io;
}
dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
if (!dic->rbuf) {
ret = -ENOMEM;
- goto destroy_decompress_ctx;
+ goto out_destroy_decompress_ctx;
}
dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
@@ -816,18 +806,34 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
vm_unmap_ram(dic->cbuf, dic->nr_cpages);
out_vunmap_rbuf:
vm_unmap_ram(dic->rbuf, dic->cluster_size);
-destroy_decompress_ctx:
+out_destroy_decompress_ctx:
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
-out_free_dic:
- if (!verity)
- f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
- ret, false);
-
- trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx,
- dic->clen, ret);
- if (!verity)
- f2fs_free_dic(dic);
+out_end_io:
+ trace_f2fs_decompress_cluster_end(dic->inode, dic->cluster_idx,
+ dic->clen, ret);
+ f2fs_decompress_end_io(dic, ret);
+}
+
+/*
+ * This is called when a page of a compressed cluster has been read from disk
+ * (or failed to be read from disk). It checks whether this page was the last
+ * page being waited on in the cluster, and if so, it decompresses the cluster
+ * (or in the case of a failure, cleans up without actually decompressing).
+ */
+void f2fs_end_read_compressed_page(struct page *page, bool failed)
+{
+ struct decompress_io_ctx *dic =
+ (struct decompress_io_ctx *)page_private(page);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
+
+ dec_page_count(sbi, F2FS_RD_DATA);
+
+ if (failed)
+ WRITE_ONCE(dic->failed, true);
+
+ if (atomic_dec_and_test(&dic->remaining_pages))
+ f2fs_decompress_cluster(dic);
}
static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index)
@@ -1494,6 +1500,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
return err;
}
+static void f2fs_free_dic(struct decompress_io_ctx *dic);
+
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
{
struct decompress_io_ctx *dic;
@@ -1512,12 +1520,14 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
dic->inode = cc->inode;
- atomic_set(&dic->pending_pages, cc->nr_cpages);
+ atomic_set(&dic->remaining_pages, cc->nr_cpages);
dic->cluster_idx = cc->cluster_idx;
dic->cluster_size = cc->cluster_size;
dic->log_cluster_size = cc->log_cluster_size;
dic->nr_cpages = cc->nr_cpages;
+ refcount_set(&dic->refcnt, 1);
dic->failed = false;
+ dic->need_verity = f2fs_need_verity(cc->inode, start_idx);
for (i = 0; i < dic->cluster_size; i++)
dic->rpages[i] = cc->rpages[i];
@@ -1546,7 +1556,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
return ERR_PTR(-ENOMEM);
}
-void f2fs_free_dic(struct decompress_io_ctx *dic)
+static void f2fs_free_dic(struct decompress_io_ctx *dic)
{
int i;
@@ -1574,30 +1584,89 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
kmem_cache_free(dic_entry_slab, dic);
}
-void f2fs_decompress_end_io(struct page **rpages,
- unsigned int cluster_size, bool err, bool verity)
+static void f2fs_put_dic(struct decompress_io_ctx *dic)
+{
+ if (refcount_dec_and_test(&dic->refcnt))
+ f2fs_free_dic(dic);
+}
+
+/*
+ * Update and unlock the cluster's decompressed pagecache pages, and release the
+ * reference to the decompress_io_ctx that was taken for decompression itself.
+ */
+static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
{
int i;
- for (i = 0; i < cluster_size; i++) {
- struct page *rpage = rpages[i];
+ for (i = 0; i < dic->cluster_size; i++) {
+ struct page *rpage = dic->rpages[i];
if (!rpage)
continue;
- if (err || PageError(rpage))
- goto clear_uptodate;
-
- if (!verity || fsverity_verify_page(rpage)) {
+ /* PG_error was set if verity failed. */
+ if (failed || PageError(rpage)) {
+ ClearPageUptodate(rpage);
+ /* will re-read again later */
+ ClearPageError(rpage);
+ } else {
SetPageUptodate(rpage);
- goto unlock;
}
-clear_uptodate:
- ClearPageUptodate(rpage);
- ClearPageError(rpage);
-unlock:
unlock_page(rpage);
}
+
+ f2fs_put_dic(dic);
+}
+
+static void f2fs_verify_cluster(struct work_struct *work)
+{
+ struct decompress_io_ctx *dic =
+ container_of(work, struct decompress_io_ctx, verity_work);
+ int i;
+
+ /* Verify the cluster's decompressed pages with fs-verity. */
+ for (i = 0; i < dic->cluster_size; i++) {
+ struct page *rpage = dic->rpages[i];
+
+ if (rpage && !fsverity_verify_page(rpage))
+ SetPageError(rpage);
+ }
+
+ __f2fs_decompress_end_io(dic, false);
+}
+
+/*
+ * This is called when a compressed cluster has been decompressed
+ * (or failed to be read and/or decompressed).
+ */
+void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
+{
+ if (!failed && dic->need_verity) {
+ /*
+ * Note that to avoid deadlocks, the verity work can't be done
+ * on the decompression workqueue. This is because verifying
+ * the data pages can involve reading metadata pages from the
+ * file, and these metadata pages may be compressed.
+ */
+ INIT_WORK(&dic->verity_work, f2fs_verify_cluster);
+ fsverity_enqueue_verify_work(&dic->verity_work);
+ } else {
+ __f2fs_decompress_end_io(dic, failed);
+ }
+}
+
+/*
+ * Put a reference to the decompression context held by a compressed page in a
+ * bio. We needed this reference in order to keep the compressed pages around
+ * until the bio(s) that contain them have been freed; sometimes that doesn't
+ * happen until after the decompression has finished.
+ */
+void f2fs_put_page_decompress_io_ctx(struct page *page)
+{
+ struct decompress_io_ctx *dic =
+ (struct decompress_io_ctx *)page_private(page);
+
+ f2fs_put_dic(dic);
}
int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aa34d620bec98..d4e86639707f4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -115,10 +115,21 @@ static enum count_type __read_io_type(struct page *page)
/* postprocessing steps for read bios */
enum bio_post_read_step {
- STEP_DECRYPT,
- STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
- STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
- STEP_VERITY,
+#ifdef CONFIG_FS_ENCRYPTION
+ STEP_DECRYPT = 1 << 0,
+#else
+ STEP_DECRYPT = 0, /* compile out the decryption-related code */
+#endif
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ STEP_DECOMPRESS = 1 << 1,
+#else
+ STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
+#endif
+#ifdef CONFIG_FS_VERITY
+ STEP_VERITY = 1 << 2,
+#else
+ STEP_VERITY = 0, /* compile out the verity-related code */
+#endif
};
struct bio_post_read_ctx {
@@ -128,25 +139,26 @@ struct bio_post_read_ctx {
unsigned int enabled_steps;
};
-static void __read_end_io(struct bio *bio, bool compr, bool verity)
+static void f2fs_finish_read_bio(struct bio *bio)
{
- struct page *page;
struct bio_vec *bv;
struct bvec_iter_all iter_all;
+ /*
+ * Update and unlock the bio's pagecache pages, and put the
+ * decompression context for any compressed pages.
+ */
bio_for_each_segment_all(bv, bio, iter_all) {
- page = bv->bv_page;
+ struct page *page = bv->bv_page;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (compr && f2fs_is_compressed_page(page)) {
- f2fs_decompress_pages(bio, page, verity);
+ if (f2fs_is_compressed_page(page)) {
+ if (bio->bi_status)
+ f2fs_end_read_compressed_page(page, true);
+ f2fs_put_page_decompress_io_ctx(page);
continue;
}
- if (verity)
- continue;
-#endif
- /* PG_error was set if any post_read step failed */
+ /* PG_error was set if decryption or verity failed. */
if (bio->bi_status || PageError(page)) {
ClearPageUptodate(page);
/* will re-read again later */
@@ -157,181 +169,129 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity)
dec_page_count(F2FS_P_SB(page), __read_io_type(page));
unlock_page(page);
}
-}
-
-static void f2fs_release_read_bio(struct bio *bio);
-static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
-{
- if (!compr)
- __read_end_io(bio, false, verity);
- f2fs_release_read_bio(bio);
-}
-
-static void f2fs_decompress_bio(struct bio *bio, bool verity)
-{
- __read_end_io(bio, true, verity);
-}
-
-static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
-
-static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
-{
- fscrypt_decrypt_bio(ctx->bio);
-}
-
-static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
-{
- f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
-}
-
-#ifdef CONFIG_F2FS_FS_COMPRESSION
-static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
-{
- f2fs_decompress_end_io(rpages, cluster_size, false, true);
-}
-
-static void f2fs_verify_bio(struct bio *bio)
-{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
- struct decompress_io_ctx *dic;
-
- dic = (struct decompress_io_ctx *)page_private(page);
-
- if (dic) {
- if (atomic_dec_return(&dic->verity_pages))
- continue;
- f2fs_verify_pages(dic->rpages,
- dic->cluster_size);
- f2fs_free_dic(dic);
- continue;
- }
-
- if (bio->bi_status || PageError(page))
- goto clear_uptodate;
-
- if (fsverity_verify_page(page)) {
- SetPageUptodate(page);
- goto unlock;
- }
-clear_uptodate:
- ClearPageUptodate(page);
- ClearPageError(page);
-unlock:
- dec_page_count(F2FS_P_SB(page), __read_io_type(page));
- unlock_page(page);
- }
+ if (bio->bi_private)
+ mempool_free(bio->bi_private, bio_post_read_ctx_pool);
+ bio_put(bio);
}
-#endif
-static void f2fs_verity_work(struct work_struct *work)
+static void f2fs_verify_bio(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
struct bio *bio = ctx->bio;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- unsigned int enabled_steps = ctx->enabled_steps;
-#endif
+ bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
/*
* fsverity_verify_bio() may call readpages() again, and while verity
- * will be disabled for this, decryption may still be needed, resulting
- * in another bio_post_read_ctx being allocated. So to prevent
- * deadlocks we need to release the current ctx to the mempool first.
- * This assumes that verity is the last post-read step.
+ * will be disabled for this, decryption and/or decompression may still
+ * be needed, resulting in another bio_post_read_ctx being allocated.
+ * So to prevent deadlocks we need to release the current ctx to the
+ * mempool first. This assumes that verity is the last post-read step.
*/
mempool_free(ctx, bio_post_read_ctx_pool);
bio->bi_private = NULL;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- /* previous step is decompression */
- if (enabled_steps & (1 << STEP_DECOMPRESS)) {
- f2fs_verify_bio(bio);
- f2fs_release_read_bio(bio);
- return;
+ /*
+ * Verify the bio's pages with fs-verity. Exclude compressed pages,
+ * as those were handled separately by f2fs_end_read_compressed_page().
+ */
+ if (may_have_compressed_pages) {
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ struct page *page = bv->bv_page;
+
+ if (!f2fs_is_compressed_page(page) &&
+ !PageError(page) && !fsverity_verify_page(page))
+ SetPageError(page);
+ }
+ } else {
+ fsverity_verify_bio(bio);
}
-#endif
- fsverity_verify_bio(bio);
- __f2fs_read_end_io(bio, false, false);
+ f2fs_finish_read_bio(bio);
}
-static void f2fs_post_read_work(struct work_struct *work)
+/*
+ * If the bio's data needs to be verified with fs-verity, then enqueue the
+ * verity work for the bio. Otherwise finish the bio now.
+ *
+ * Note that to avoid deadlocks, the verity work can't be done on the
+ * decryption/decompression workqueue. This is because verifying the data pages
+ * can involve reading verity metadata pages from the file, and these verity
+ * metadata pages may be encrypted and/or compressed.
+ */
+static void f2fs_verify_and_finish_bio(struct bio *bio)
{
- struct bio_post_read_ctx *ctx =
- container_of(work, struct bio_post_read_ctx, work);
-
- if (ctx->enabled_steps & (1 << STEP_DECRYPT))
- f2fs_decrypt_work(ctx);
+ struct bio_post_read_ctx *ctx = bio->bi_private;
- if (ctx->enabled_steps & (1 << STEP_DECOMPRESS))
- f2fs_decompress_work(ctx);
-
- if (ctx->enabled_steps & (1 << STEP_VERITY)) {
- INIT_WORK(&ctx->work, f2fs_verity_work);
+ if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
+ INIT_WORK(&ctx->work, f2fs_verify_bio);
fsverity_enqueue_verify_work(&ctx->work);
- return;
+ } else {
+ f2fs_finish_read_bio(bio);
}
-
- __f2fs_read_end_io(ctx->bio,
- ctx->enabled_steps & (1 << STEP_DECOMPRESS), false);
}
-static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi,
- struct work_struct *work)
+static void f2fs_post_read_work(struct work_struct *work)
{
- queue_work(sbi->post_read_wq, work);
-}
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+ struct bio *bio = ctx->bio;
-static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
-{
- /*
- * We use different work queues for decryption and for verity because
- * verity may require reading metadata pages that need decryption, and
- * we shouldn't recurse to the same workqueue.
- */
+ if (ctx->enabled_steps & STEP_DECRYPT)
+ fscrypt_decrypt_bio(bio);
- if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
- ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
- INIT_WORK(&ctx->work, f2fs_post_read_work);
- f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
- return;
- }
+ if (ctx->enabled_steps & STEP_DECOMPRESS) {
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+ bool all_compressed = true;
- if (ctx->enabled_steps & (1 << STEP_VERITY)) {
- INIT_WORK(&ctx->work, f2fs_verity_work);
- fsverity_enqueue_verify_work(&ctx->work);
- return;
- }
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ struct page *page = bv->bv_page;
+ /* PG_error will be set if decryption failed. */
+ bool failed = PageError(page);
- __f2fs_read_end_io(ctx->bio, false, false);
-}
+ if (f2fs_is_compressed_page(page))
+ f2fs_end_read_compressed_page(page, failed);
+ else
+ all_compressed = false;
+ }
+ /*
+ * Optimization: if all the bio's pages are compressed, then
+ * scheduling the per-bio verity work is unnecessary, as verity
+ * will be fully handled at the compression cluster level.
+ */
+ if (all_compressed)
+ ctx->enabled_steps &= ~STEP_VERITY;
+ }
-static bool f2fs_bio_post_read_required(struct bio *bio)
-{
- return bio->bi_private;
+ f2fs_verify_and_finish_bio(bio);
}
static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
+ struct bio_post_read_ctx *ctx = bio->bi_private;
if (time_to_inject(sbi, FAULT_READ_IO)) {
f2fs_show_injection_info(sbi, FAULT_READ_IO);
bio->bi_status = BLK_STS_IOERR;
}
- if (f2fs_bio_post_read_required(bio)) {
- struct bio_post_read_ctx *ctx = bio->bi_private;
-
- bio_post_read_processing(ctx);
+ if (bio->bi_status) {
+ f2fs_finish_read_bio(bio);
return;
}
- __f2fs_read_end_io(bio, false, false);
+ if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
+ INIT_WORK(&ctx->work, f2fs_post_read_work);
+ queue_work(ctx->sbi->post_read_wq, &ctx->work);
+ } else {
+ f2fs_verify_and_finish_bio(bio);
+ }
}
static void f2fs_write_end_io(struct bio *bio)
@@ -1022,16 +982,9 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
up_write(&io->io_rwsem);
}
-static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
-{
- return fsverity_active(inode) &&
- idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
-}
-
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned nr_pages, unsigned op_flag,
- pgoff_t first_idx, bool for_write,
- bool for_verity)
+ pgoff_t first_idx, bool for_write)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -1050,13 +1003,19 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (fscrypt_inode_uses_fs_layer_crypto(inode))
- post_read_steps |= 1 << STEP_DECRYPT;
- if (f2fs_compressed_file(inode))
- post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
- if (for_verity && f2fs_need_verity(inode, first_idx))
- post_read_steps |= 1 << STEP_VERITY;
+ post_read_steps |= STEP_DECRYPT;
+
+ if (f2fs_need_verity(inode, first_idx))
+ post_read_steps |= STEP_VERITY;
- if (post_read_steps) {
+ /*
+ * STEP_DECOMPRESS is handled specially, since a compressed file might
+ * contain both compressed and uncompressed clusters. We'll allocate a
+ * bio_post_read_ctx if the file is compressed, but the caller is
+ * responsible for enabling STEP_DECOMPRESS if it's actually needed.
+ */
+
+ if (post_read_steps || f2fs_compressed_file(inode)) {
/* Due to the mempool, this never fails. */
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
ctx->bio = bio;
@@ -1068,13 +1027,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
return bio;
}
-static void f2fs_release_read_bio(struct bio *bio)
-{
- if (bio->bi_private)
- mempool_free(bio->bi_private, bio_post_read_ctx_pool);
- bio_put(bio);
-}
-
/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr, int op_flags, bool for_write)
@@ -1083,7 +1035,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
struct bio *bio;
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
- page->index, for_write, true);
+ page->index, for_write);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -2121,7 +2073,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
is_readahead ? REQ_RAHEAD : 0, page->index,
- false, true);
+ false);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
@@ -2167,8 +2119,6 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
sector_t last_block_in_file;
const unsigned blocksize = blks_to_bytes(inode, 1);
struct decompress_io_ctx *dic = NULL;
- struct bio_post_read_ctx *ctx;
- bool for_verity = false;
int i;
int ret = 0;
@@ -2234,29 +2184,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
goto out_put_dnode;
}
- /*
- * It's possible to enable fsverity on the fly when handling a cluster,
- * which requires complicated error handling. Instead of adding more
- * complexity, let's give a rule where end_io post-processes fsverity
- * per cluster. In order to do that, we need to submit bio, if previous
- * bio sets a different post-process policy.
- */
- if (fsverity_active(cc->inode)) {
- atomic_set(&dic->verity_pages, cc->nr_cpages);
- for_verity = true;
-
- if (bio) {
- ctx = bio->bi_private;
- if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
- __submit_bio(sbi, bio, DATA);
- bio = NULL;
- }
- }
- }
-
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page = dic->cpages[i];
block_t blkaddr;
+ struct bio_post_read_ctx *ctx;
blkaddr = data_blkaddr(dn.inode, dn.node_page,
dn.ofs_in_node + i + 1);
@@ -2272,31 +2203,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (!bio) {
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
is_readahead ? REQ_RAHEAD : 0,
- page->index, for_write, for_verity);
+ page->index, for_write);
if (IS_ERR(bio)) {
- unsigned int remained = dic->nr_cpages - i;
- bool release = false;
-
ret = PTR_ERR(bio);
- dic->failed = true;
-
- if (for_verity) {
- if (!atomic_sub_return(remained,
- &dic->verity_pages))
- release = true;
- } else {
- if (!atomic_sub_return(remained,
- &dic->pending_pages))
- release = true;
- }
-
- if (release) {
- f2fs_decompress_end_io(dic->rpages,
- cc->cluster_size, true,
- false);
- f2fs_free_dic(dic);
- }
-
+ f2fs_decompress_end_io(dic, ret);
f2fs_put_dnode(&dn);
*bio_ret = NULL;
return ret;
@@ -2308,10 +2218,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
- /* tag STEP_DECOMPRESS to handle IO in wq */
ctx = bio->bi_private;
- if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
- ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
+ ctx->enabled_steps |= STEP_DECOMPRESS;
+ refcount_inc(&dic->refcnt);
inc_page_count(sbi, F2FS_RD_DATA);
f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
@@ -2328,7 +2237,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
out_put_dnode:
f2fs_put_dnode(&dn);
out:
- f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false);
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (cc->rpages[i]) {
+ ClearPageUptodate(cc->rpages[i]);
+ ClearPageError(cc->rpages[i]);
+ unlock_page(cc->rpages[i]);
+ }
+ }
*bio_ret = bio;
return ret;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bb11759191dcc..ed2ce437357c2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1337,7 +1337,7 @@ struct compress_io_ctx {
atomic_t pending_pages; /* in-flight compressed page count */
};
-/* decompress io context for read IO path */
+/* Context for decompressing one cluster on the read IO path */
struct decompress_io_ctx {
u32 magic; /* magic number to indicate page is compressed */
struct inode *inode; /* inode the context belong to */
@@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen; /* valid data length in rbuf */
size_t clen; /* valid data length in cbuf */
- atomic_t pending_pages; /* in-flight compressed page count */
- atomic_t verity_pages; /* in-flight page count for verity */
- bool failed; /* indicate IO error during decompression */
+ atomic_t remaining_pages; /* number of compressed pages remaining to be read */
+ refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
+ bool failed; /* IO error occurred before decompression? */
+ bool need_verity; /* need fs-verity verification after decompression? */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
+ struct work_struct verity_work; /* work to verify the decompressed pages */
};
#define NULL_CLUSTER ((unsigned int)(~0))
@@ -3876,7 +3878,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
bool f2fs_is_compress_backend_ready(struct inode *inode);
int f2fs_init_compress_mempool(void);
void f2fs_destroy_compress_mempool(void);
-void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity);
+void f2fs_end_read_compressed_page(struct page *page, bool failed);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
@@ -3889,9 +3891,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
bool is_readahead, bool for_write);
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
-void f2fs_free_dic(struct decompress_io_ctx *dic);
-void f2fs_decompress_end_io(struct page **rpages,
- unsigned int cluster_size, bool err, bool verity);
+void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
+void f2fs_put_page_decompress_io_ctx(struct page *page);
int f2fs_init_compress_ctx(struct compress_ctx *cc);
void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
@@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
}
static inline int f2fs_init_compress_mempool(void) { return 0; }
static inline void f2fs_destroy_compress_mempool(void) { }
+static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
+{
+ WARN_ON_ONCE(1);
+}
+static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
+{
+ WARN_ON_ONCE(1);
+}
static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
static inline int __init f2fs_init_compress_cache(void) { return 0; }
@@ -4114,6 +4123,12 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
return false;
}
+static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
+{
+ return fsverity_active(inode) &&
+ idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+}
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
unsigned int type);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 56b113e3cd6aa..9e2981733ea4a 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
TP_ARGS(inode, cluster_idx, cluster_size, algtype)
);
-DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
+DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
unsigned int cluster_size, unsigned char algtype),
@@ -1810,7 +1810,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end,
TP_ARGS(inode, cluster_idx, compressed_size, ret)
);
-DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
+DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_cluster_end,
TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
unsigned int compressed_size, int ret),
--
2.29.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
@ 2020-12-28 23:26 ` Eric Biggers
0 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2020-12-28 23:26 UTC (permalink / raw)
To: linux-f2fs-devel; +Cc: linux-fscrypt
From: Eric Biggers <ebiggers@google.com>
Rework the post-read processing logic to be much easier to understand.
At least one bug is fixed by this: if an I/O error occurred when reading
from disk, decryption and verity would be performed on the uninitialized
data, causing misleading messages in the kernel log.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
v2: rebased onto v5.11-rc1.
fs/f2fs/compress.c | 159 +++++++++++-----
fs/f2fs/data.c | 349 ++++++++++++++----------------------
fs/f2fs/f2fs.h | 31 +++-
include/trace/events/f2fs.h | 4 +-
4 files changed, 271 insertions(+), 272 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 4bcbacfe33259..66888b108f400 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -721,38 +721,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
return ret;
}
-void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
+static void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
{
- struct decompress_io_ctx *dic =
- (struct decompress_io_ctx *)page_private(page);
struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
- struct f2fs_inode_info *fi= F2FS_I(dic->inode);
+ struct f2fs_inode_info *fi = F2FS_I(dic->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
int ret;
int i;
- dec_page_count(sbi, F2FS_RD_DATA);
-
- if (bio->bi_status || PageError(page))
- dic->failed = true;
-
- if (atomic_dec_return(&dic->pending_pages))
- return;
-
- trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
- dic->cluster_size, fi->i_compress_algorithm);
+ trace_f2fs_decompress_cluster_start(dic->inode, dic->cluster_idx,
+ dic->cluster_size,
+ fi->i_compress_algorithm);
- /* submit partial compressed pages */
if (dic->failed) {
ret = -EIO;
- goto out_free_dic;
+ goto out_end_io;
}
dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
if (!dic->tpages) {
ret = -ENOMEM;
- goto out_free_dic;
+ goto out_end_io;
}
for (i = 0; i < dic->cluster_size; i++) {
@@ -764,20 +754,20 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
dic->tpages[i] = f2fs_compress_alloc_page();
if (!dic->tpages[i]) {
ret = -ENOMEM;
- goto out_free_dic;
+ goto out_end_io;
}
}
if (cops->init_decompress_ctx) {
ret = cops->init_decompress_ctx(dic);
if (ret)
- goto out_free_dic;
+ goto out_end_io;
}
dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
if (!dic->rbuf) {
ret = -ENOMEM;
- goto destroy_decompress_ctx;
+ goto out_destroy_decompress_ctx;
}
dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
@@ -816,18 +806,34 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
vm_unmap_ram(dic->cbuf, dic->nr_cpages);
out_vunmap_rbuf:
vm_unmap_ram(dic->rbuf, dic->cluster_size);
-destroy_decompress_ctx:
+out_destroy_decompress_ctx:
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
-out_free_dic:
- if (!verity)
- f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
- ret, false);
-
- trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx,
- dic->clen, ret);
- if (!verity)
- f2fs_free_dic(dic);
+out_end_io:
+ trace_f2fs_decompress_cluster_end(dic->inode, dic->cluster_idx,
+ dic->clen, ret);
+ f2fs_decompress_end_io(dic, ret);
+}
+
+/*
+ * This is called when a page of a compressed cluster has been read from disk
+ * (or failed to be read from disk). It checks whether this page was the last
+ * page being waited on in the cluster, and if so, it decompresses the cluster
+ * (or in the case of a failure, cleans up without actually decompressing).
+ */
+void f2fs_end_read_compressed_page(struct page *page, bool failed)
+{
+ struct decompress_io_ctx *dic =
+ (struct decompress_io_ctx *)page_private(page);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
+
+ dec_page_count(sbi, F2FS_RD_DATA);
+
+ if (failed)
+ WRITE_ONCE(dic->failed, true);
+
+ if (atomic_dec_and_test(&dic->remaining_pages))
+ f2fs_decompress_cluster(dic);
}
static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index)
@@ -1494,6 +1500,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
return err;
}
+static void f2fs_free_dic(struct decompress_io_ctx *dic);
+
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
{
struct decompress_io_ctx *dic;
@@ -1512,12 +1520,14 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
dic->inode = cc->inode;
- atomic_set(&dic->pending_pages, cc->nr_cpages);
+ atomic_set(&dic->remaining_pages, cc->nr_cpages);
dic->cluster_idx = cc->cluster_idx;
dic->cluster_size = cc->cluster_size;
dic->log_cluster_size = cc->log_cluster_size;
dic->nr_cpages = cc->nr_cpages;
+ refcount_set(&dic->refcnt, 1);
dic->failed = false;
+ dic->need_verity = f2fs_need_verity(cc->inode, start_idx);
for (i = 0; i < dic->cluster_size; i++)
dic->rpages[i] = cc->rpages[i];
@@ -1546,7 +1556,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
return ERR_PTR(-ENOMEM);
}
-void f2fs_free_dic(struct decompress_io_ctx *dic)
+static void f2fs_free_dic(struct decompress_io_ctx *dic)
{
int i;
@@ -1574,30 +1584,89 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
kmem_cache_free(dic_entry_slab, dic);
}
-void f2fs_decompress_end_io(struct page **rpages,
- unsigned int cluster_size, bool err, bool verity)
+static void f2fs_put_dic(struct decompress_io_ctx *dic)
+{
+ if (refcount_dec_and_test(&dic->refcnt))
+ f2fs_free_dic(dic);
+}
+
+/*
+ * Update and unlock the cluster's decompressed pagecache pages, and release the
+ * reference to the decompress_io_ctx that was taken for decompression itself.
+ */
+static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
{
int i;
- for (i = 0; i < cluster_size; i++) {
- struct page *rpage = rpages[i];
+ for (i = 0; i < dic->cluster_size; i++) {
+ struct page *rpage = dic->rpages[i];
if (!rpage)
continue;
- if (err || PageError(rpage))
- goto clear_uptodate;
-
- if (!verity || fsverity_verify_page(rpage)) {
+ /* PG_error was set if verity failed. */
+ if (failed || PageError(rpage)) {
+ ClearPageUptodate(rpage);
+ /* will re-read again later */
+ ClearPageError(rpage);
+ } else {
SetPageUptodate(rpage);
- goto unlock;
}
-clear_uptodate:
- ClearPageUptodate(rpage);
- ClearPageError(rpage);
-unlock:
unlock_page(rpage);
}
+
+ f2fs_put_dic(dic);
+}
+
+static void f2fs_verify_cluster(struct work_struct *work)
+{
+ struct decompress_io_ctx *dic =
+ container_of(work, struct decompress_io_ctx, verity_work);
+ int i;
+
+ /* Verify the cluster's decompressed pages with fs-verity. */
+ for (i = 0; i < dic->cluster_size; i++) {
+ struct page *rpage = dic->rpages[i];
+
+ if (rpage && !fsverity_verify_page(rpage))
+ SetPageError(rpage);
+ }
+
+ __f2fs_decompress_end_io(dic, false);
+}
+
+/*
+ * This is called when a compressed cluster has been decompressed
+ * (or failed to be read and/or decompressed).
+ */
+void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
+{
+ if (!failed && dic->need_verity) {
+ /*
+ * Note that to avoid deadlocks, the verity work can't be done
+ * on the decompression workqueue. This is because verifying
+ * the data pages can involve reading metadata pages from the
+ * file, and these metadata pages may be compressed.
+ */
+ INIT_WORK(&dic->verity_work, f2fs_verify_cluster);
+ fsverity_enqueue_verify_work(&dic->verity_work);
+ } else {
+ __f2fs_decompress_end_io(dic, failed);
+ }
+}
+
+/*
+ * Put a reference to the decompression context held by a compressed page in a
+ * bio. We needed this reference in order to keep the compressed pages around
+ * until the bio(s) that contain them have been freed; sometimes that doesn't
+ * happen until after the decompression has finished.
+ */
+void f2fs_put_page_decompress_io_ctx(struct page *page)
+{
+ struct decompress_io_ctx *dic =
+ (struct decompress_io_ctx *)page_private(page);
+
+ f2fs_put_dic(dic);
}
int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aa34d620bec98..d4e86639707f4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -115,10 +115,21 @@ static enum count_type __read_io_type(struct page *page)
/* postprocessing steps for read bios */
enum bio_post_read_step {
- STEP_DECRYPT,
- STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
- STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
- STEP_VERITY,
+#ifdef CONFIG_FS_ENCRYPTION
+ STEP_DECRYPT = 1 << 0,
+#else
+ STEP_DECRYPT = 0, /* compile out the decryption-related code */
+#endif
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ STEP_DECOMPRESS = 1 << 1,
+#else
+ STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
+#endif
+#ifdef CONFIG_FS_VERITY
+ STEP_VERITY = 1 << 2,
+#else
+ STEP_VERITY = 0, /* compile out the verity-related code */
+#endif
};
struct bio_post_read_ctx {
@@ -128,25 +139,26 @@ struct bio_post_read_ctx {
unsigned int enabled_steps;
};
-static void __read_end_io(struct bio *bio, bool compr, bool verity)
+static void f2fs_finish_read_bio(struct bio *bio)
{
- struct page *page;
struct bio_vec *bv;
struct bvec_iter_all iter_all;
+ /*
+ * Update and unlock the bio's pagecache pages, and put the
+ * decompression context for any compressed pages.
+ */
bio_for_each_segment_all(bv, bio, iter_all) {
- page = bv->bv_page;
+ struct page *page = bv->bv_page;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (compr && f2fs_is_compressed_page(page)) {
- f2fs_decompress_pages(bio, page, verity);
+ if (f2fs_is_compressed_page(page)) {
+ if (bio->bi_status)
+ f2fs_end_read_compressed_page(page, true);
+ f2fs_put_page_decompress_io_ctx(page);
continue;
}
- if (verity)
- continue;
-#endif
- /* PG_error was set if any post_read step failed */
+ /* PG_error was set if decryption or verity failed. */
if (bio->bi_status || PageError(page)) {
ClearPageUptodate(page);
/* will re-read again later */
@@ -157,181 +169,129 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity)
dec_page_count(F2FS_P_SB(page), __read_io_type(page));
unlock_page(page);
}
-}
-
-static void f2fs_release_read_bio(struct bio *bio);
-static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
-{
- if (!compr)
- __read_end_io(bio, false, verity);
- f2fs_release_read_bio(bio);
-}
-
-static void f2fs_decompress_bio(struct bio *bio, bool verity)
-{
- __read_end_io(bio, true, verity);
-}
-
-static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
-
-static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
-{
- fscrypt_decrypt_bio(ctx->bio);
-}
-
-static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
-{
- f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
-}
-
-#ifdef CONFIG_F2FS_FS_COMPRESSION
-static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
-{
- f2fs_decompress_end_io(rpages, cluster_size, false, true);
-}
-
-static void f2fs_verify_bio(struct bio *bio)
-{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
- struct decompress_io_ctx *dic;
-
- dic = (struct decompress_io_ctx *)page_private(page);
-
- if (dic) {
- if (atomic_dec_return(&dic->verity_pages))
- continue;
- f2fs_verify_pages(dic->rpages,
- dic->cluster_size);
- f2fs_free_dic(dic);
- continue;
- }
-
- if (bio->bi_status || PageError(page))
- goto clear_uptodate;
-
- if (fsverity_verify_page(page)) {
- SetPageUptodate(page);
- goto unlock;
- }
-clear_uptodate:
- ClearPageUptodate(page);
- ClearPageError(page);
-unlock:
- dec_page_count(F2FS_P_SB(page), __read_io_type(page));
- unlock_page(page);
- }
+ if (bio->bi_private)
+ mempool_free(bio->bi_private, bio_post_read_ctx_pool);
+ bio_put(bio);
}
-#endif
-static void f2fs_verity_work(struct work_struct *work)
+static void f2fs_verify_bio(struct work_struct *work)
{
struct bio_post_read_ctx *ctx =
container_of(work, struct bio_post_read_ctx, work);
struct bio *bio = ctx->bio;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- unsigned int enabled_steps = ctx->enabled_steps;
-#endif
+ bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
/*
* fsverity_verify_bio() may call readpages() again, and while verity
- * will be disabled for this, decryption may still be needed, resulting
- * in another bio_post_read_ctx being allocated. So to prevent
- * deadlocks we need to release the current ctx to the mempool first.
- * This assumes that verity is the last post-read step.
+ * will be disabled for this, decryption and/or decompression may still
+ * be needed, resulting in another bio_post_read_ctx being allocated.
+ * So to prevent deadlocks we need to release the current ctx to the
+ * mempool first. This assumes that verity is the last post-read step.
*/
mempool_free(ctx, bio_post_read_ctx_pool);
bio->bi_private = NULL;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- /* previous step is decompression */
- if (enabled_steps & (1 << STEP_DECOMPRESS)) {
- f2fs_verify_bio(bio);
- f2fs_release_read_bio(bio);
- return;
+ /*
+ * Verify the bio's pages with fs-verity. Exclude compressed pages,
+ * as those were handled separately by f2fs_end_read_compressed_page().
+ */
+ if (may_have_compressed_pages) {
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ struct page *page = bv->bv_page;
+
+ if (!f2fs_is_compressed_page(page) &&
+ !PageError(page) && !fsverity_verify_page(page))
+ SetPageError(page);
+ }
+ } else {
+ fsverity_verify_bio(bio);
}
-#endif
- fsverity_verify_bio(bio);
- __f2fs_read_end_io(bio, false, false);
+ f2fs_finish_read_bio(bio);
}
-static void f2fs_post_read_work(struct work_struct *work)
+/*
+ * If the bio's data needs to be verified with fs-verity, then enqueue the
+ * verity work for the bio. Otherwise finish the bio now.
+ *
+ * Note that to avoid deadlocks, the verity work can't be done on the
+ * decryption/decompression workqueue. This is because verifying the data pages
+ * can involve reading verity metadata pages from the file, and these verity
+ * metadata pages may be encrypted and/or compressed.
+ */
+static void f2fs_verify_and_finish_bio(struct bio *bio)
{
- struct bio_post_read_ctx *ctx =
- container_of(work, struct bio_post_read_ctx, work);
-
- if (ctx->enabled_steps & (1 << STEP_DECRYPT))
- f2fs_decrypt_work(ctx);
+ struct bio_post_read_ctx *ctx = bio->bi_private;
- if (ctx->enabled_steps & (1 << STEP_DECOMPRESS))
- f2fs_decompress_work(ctx);
-
- if (ctx->enabled_steps & (1 << STEP_VERITY)) {
- INIT_WORK(&ctx->work, f2fs_verity_work);
+ if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
+ INIT_WORK(&ctx->work, f2fs_verify_bio);
fsverity_enqueue_verify_work(&ctx->work);
- return;
+ } else {
+ f2fs_finish_read_bio(bio);
}
-
- __f2fs_read_end_io(ctx->bio,
- ctx->enabled_steps & (1 << STEP_DECOMPRESS), false);
}
-static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi,
- struct work_struct *work)
+static void f2fs_post_read_work(struct work_struct *work)
{
- queue_work(sbi->post_read_wq, work);
-}
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+ struct bio *bio = ctx->bio;
-static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
-{
- /*
- * We use different work queues for decryption and for verity because
- * verity may require reading metadata pages that need decryption, and
- * we shouldn't recurse to the same workqueue.
- */
+ if (ctx->enabled_steps & STEP_DECRYPT)
+ fscrypt_decrypt_bio(bio);
- if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
- ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
- INIT_WORK(&ctx->work, f2fs_post_read_work);
- f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
- return;
- }
+ if (ctx->enabled_steps & STEP_DECOMPRESS) {
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+ bool all_compressed = true;
- if (ctx->enabled_steps & (1 << STEP_VERITY)) {
- INIT_WORK(&ctx->work, f2fs_verity_work);
- fsverity_enqueue_verify_work(&ctx->work);
- return;
- }
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ struct page *page = bv->bv_page;
+ /* PG_error will be set if decryption failed. */
+ bool failed = PageError(page);
- __f2fs_read_end_io(ctx->bio, false, false);
-}
+ if (f2fs_is_compressed_page(page))
+ f2fs_end_read_compressed_page(page, failed);
+ else
+ all_compressed = false;
+ }
+ /*
+ * Optimization: if all the bio's pages are compressed, then
+ * scheduling the per-bio verity work is unnecessary, as verity
+ * will be fully handled at the compression cluster level.
+ */
+ if (all_compressed)
+ ctx->enabled_steps &= ~STEP_VERITY;
+ }
-static bool f2fs_bio_post_read_required(struct bio *bio)
-{
- return bio->bi_private;
+ f2fs_verify_and_finish_bio(bio);
}
static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
+ struct bio_post_read_ctx *ctx = bio->bi_private;
if (time_to_inject(sbi, FAULT_READ_IO)) {
f2fs_show_injection_info(sbi, FAULT_READ_IO);
bio->bi_status = BLK_STS_IOERR;
}
- if (f2fs_bio_post_read_required(bio)) {
- struct bio_post_read_ctx *ctx = bio->bi_private;
-
- bio_post_read_processing(ctx);
+ if (bio->bi_status) {
+ f2fs_finish_read_bio(bio);
return;
}
- __f2fs_read_end_io(bio, false, false);
+ if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
+ INIT_WORK(&ctx->work, f2fs_post_read_work);
+ queue_work(ctx->sbi->post_read_wq, &ctx->work);
+ } else {
+ f2fs_verify_and_finish_bio(bio);
+ }
}
static void f2fs_write_end_io(struct bio *bio)
@@ -1022,16 +982,9 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
up_write(&io->io_rwsem);
}
-static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
-{
- return fsverity_active(inode) &&
- idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
-}
-
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned nr_pages, unsigned op_flag,
- pgoff_t first_idx, bool for_write,
- bool for_verity)
+ pgoff_t first_idx, bool for_write)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -1050,13 +1003,19 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (fscrypt_inode_uses_fs_layer_crypto(inode))
- post_read_steps |= 1 << STEP_DECRYPT;
- if (f2fs_compressed_file(inode))
- post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
- if (for_verity && f2fs_need_verity(inode, first_idx))
- post_read_steps |= 1 << STEP_VERITY;
+ post_read_steps |= STEP_DECRYPT;
+
+ if (f2fs_need_verity(inode, first_idx))
+ post_read_steps |= STEP_VERITY;
- if (post_read_steps) {
+ /*
+ * STEP_DECOMPRESS is handled specially, since a compressed file might
+ * contain both compressed and uncompressed clusters. We'll allocate a
+ * bio_post_read_ctx if the file is compressed, but the caller is
+ * responsible for enabling STEP_DECOMPRESS if it's actually needed.
+ */
+
+ if (post_read_steps || f2fs_compressed_file(inode)) {
/* Due to the mempool, this never fails. */
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
ctx->bio = bio;
@@ -1068,13 +1027,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
return bio;
}
-static void f2fs_release_read_bio(struct bio *bio)
-{
- if (bio->bi_private)
- mempool_free(bio->bi_private, bio_post_read_ctx_pool);
- bio_put(bio);
-}
-
/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr, int op_flags, bool for_write)
@@ -1083,7 +1035,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
struct bio *bio;
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
- page->index, for_write, true);
+ page->index, for_write);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -2121,7 +2073,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
is_readahead ? REQ_RAHEAD : 0, page->index,
- false, true);
+ false);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
@@ -2167,8 +2119,6 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
sector_t last_block_in_file;
const unsigned blocksize = blks_to_bytes(inode, 1);
struct decompress_io_ctx *dic = NULL;
- struct bio_post_read_ctx *ctx;
- bool for_verity = false;
int i;
int ret = 0;
@@ -2234,29 +2184,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
goto out_put_dnode;
}
- /*
- * It's possible to enable fsverity on the fly when handling a cluster,
- * which requires complicated error handling. Instead of adding more
- * complexity, let's give a rule where end_io post-processes fsverity
- * per cluster. In order to do that, we need to submit bio, if previous
- * bio sets a different post-process policy.
- */
- if (fsverity_active(cc->inode)) {
- atomic_set(&dic->verity_pages, cc->nr_cpages);
- for_verity = true;
-
- if (bio) {
- ctx = bio->bi_private;
- if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
- __submit_bio(sbi, bio, DATA);
- bio = NULL;
- }
- }
- }
-
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page = dic->cpages[i];
block_t blkaddr;
+ struct bio_post_read_ctx *ctx;
blkaddr = data_blkaddr(dn.inode, dn.node_page,
dn.ofs_in_node + i + 1);
@@ -2272,31 +2203,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (!bio) {
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
is_readahead ? REQ_RAHEAD : 0,
- page->index, for_write, for_verity);
+ page->index, for_write);
if (IS_ERR(bio)) {
- unsigned int remained = dic->nr_cpages - i;
- bool release = false;
-
ret = PTR_ERR(bio);
- dic->failed = true;
-
- if (for_verity) {
- if (!atomic_sub_return(remained,
- &dic->verity_pages))
- release = true;
- } else {
- if (!atomic_sub_return(remained,
- &dic->pending_pages))
- release = true;
- }
-
- if (release) {
- f2fs_decompress_end_io(dic->rpages,
- cc->cluster_size, true,
- false);
- f2fs_free_dic(dic);
- }
-
+ f2fs_decompress_end_io(dic, ret);
f2fs_put_dnode(&dn);
*bio_ret = NULL;
return ret;
@@ -2308,10 +2218,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
- /* tag STEP_DECOMPRESS to handle IO in wq */
ctx = bio->bi_private;
- if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
- ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
+ ctx->enabled_steps |= STEP_DECOMPRESS;
+ refcount_inc(&dic->refcnt);
inc_page_count(sbi, F2FS_RD_DATA);
f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
@@ -2328,7 +2237,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
out_put_dnode:
f2fs_put_dnode(&dn);
out:
- f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false);
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (cc->rpages[i]) {
+ ClearPageUptodate(cc->rpages[i]);
+ ClearPageError(cc->rpages[i]);
+ unlock_page(cc->rpages[i]);
+ }
+ }
*bio_ret = bio;
return ret;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bb11759191dcc..ed2ce437357c2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1337,7 +1337,7 @@ struct compress_io_ctx {
atomic_t pending_pages; /* in-flight compressed page count */
};
-/* decompress io context for read IO path */
+/* Context for decompressing one cluster on the read IO path */
struct decompress_io_ctx {
u32 magic; /* magic number to indicate page is compressed */
struct inode *inode; /* inode the context belong to */
@@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen; /* valid data length in rbuf */
size_t clen; /* valid data length in cbuf */
- atomic_t pending_pages; /* in-flight compressed page count */
- atomic_t verity_pages; /* in-flight page count for verity */
- bool failed; /* indicate IO error during decompression */
+ atomic_t remaining_pages; /* number of compressed pages remaining to be read */
+ refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
+ bool failed; /* IO error occurred before decompression? */
+ bool need_verity; /* need fs-verity verification after decompression? */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
+ struct work_struct verity_work; /* work to verify the decompressed pages */
};
#define NULL_CLUSTER ((unsigned int)(~0))
@@ -3876,7 +3878,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
bool f2fs_is_compress_backend_ready(struct inode *inode);
int f2fs_init_compress_mempool(void);
void f2fs_destroy_compress_mempool(void);
-void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity);
+void f2fs_end_read_compressed_page(struct page *page, bool failed);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
@@ -3889,9 +3891,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
bool is_readahead, bool for_write);
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
-void f2fs_free_dic(struct decompress_io_ctx *dic);
-void f2fs_decompress_end_io(struct page **rpages,
- unsigned int cluster_size, bool err, bool verity);
+void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
+void f2fs_put_page_decompress_io_ctx(struct page *page);
int f2fs_init_compress_ctx(struct compress_ctx *cc);
void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
@@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
}
static inline int f2fs_init_compress_mempool(void) { return 0; }
static inline void f2fs_destroy_compress_mempool(void) { }
+static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
+{
+ WARN_ON_ONCE(1);
+}
+static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
+{
+ WARN_ON_ONCE(1);
+}
static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
static inline int __init f2fs_init_compress_cache(void) { return 0; }
@@ -4114,6 +4123,12 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
return false;
}
+static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
+{
+ return fsverity_active(inode) &&
+ idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+}
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
unsigned int type);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 56b113e3cd6aa..9e2981733ea4a 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
TP_ARGS(inode, cluster_idx, cluster_size, algtype)
);
-DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
+DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
unsigned int cluster_size, unsigned char algtype),
@@ -1810,7 +1810,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end,
TP_ARGS(inode, cluster_idx, compressed_size, ret)
);
-DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
+DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_cluster_end,
TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
unsigned int compressed_size, int ret),
--
2.29.2
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
2020-12-28 23:26 ` [f2fs-dev] " Eric Biggers
@ 2021-01-04 3:35 ` Chao Yu
-1 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2021-01-04 3:35 UTC (permalink / raw)
To: Eric Biggers, linux-f2fs-devel; +Cc: linux-fscrypt
On 2020/12/29 7:26, Eric Biggers wrote:
> + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
> + INIT_WORK(&ctx->work, f2fs_post_read_work);
> + queue_work(ctx->sbi->post_read_wq, &ctx->work);
Could you keep STEP_DECOMPRESS_NOWQ related logic? so that bio only includes
non-compressed pages could be handled in irq context rather than in wq context
which should be unneeded.
Thanks,
> + } else {
> + f2fs_verify_and_finish_bio(bio);
> + }
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
@ 2021-01-04 3:35 ` Chao Yu
0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2021-01-04 3:35 UTC (permalink / raw)
To: Eric Biggers, linux-f2fs-devel; +Cc: linux-fscrypt
On 2020/12/29 7:26, Eric Biggers wrote:
> + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
> + INIT_WORK(&ctx->work, f2fs_post_read_work);
> + queue_work(ctx->sbi->post_read_wq, &ctx->work);
Could you keep STEP_DECOMPRESS_NOWQ related logic? so that bio only includes
non-compressed pages could be handled in irq context rather than in wq context
which should be unneeded.
Thanks,
> + } else {
> + f2fs_verify_and_finish_bio(bio);
> + }
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
2021-01-04 3:35 ` Chao Yu
@ 2021-01-04 3:45 ` Eric Biggers
-1 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2021-01-04 3:45 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-f2fs-devel, linux-fscrypt
On Mon, Jan 04, 2021 at 11:35:58AM +0800, Chao Yu wrote:
> On 2020/12/29 7:26, Eric Biggers wrote:
> > + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
> > + INIT_WORK(&ctx->work, f2fs_post_read_work);
> > + queue_work(ctx->sbi->post_read_wq, &ctx->work);
>
> Could you keep STEP_DECOMPRESS_NOWQ related logic? so that bio only includes
> non-compressed pages could be handled in irq context rather than in wq context
> which should be unneeded.
>
> Thanks,
>
That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
it's actually needed.
- Eric
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
@ 2021-01-04 3:45 ` Eric Biggers
0 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2021-01-04 3:45 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-fscrypt, linux-f2fs-devel
On Mon, Jan 04, 2021 at 11:35:58AM +0800, Chao Yu wrote:
> On 2020/12/29 7:26, Eric Biggers wrote:
> > + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
> > + INIT_WORK(&ctx->work, f2fs_post_read_work);
> > + queue_work(ctx->sbi->post_read_wq, &ctx->work);
>
> Could you keep STEP_DECOMPRESS_NOWQ related logic? so that bio only includes
> non-compressed pages could be handled in irq context rather than in wq context
> which should be unneeded.
>
> Thanks,
>
That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
it's actually needed.
- Eric
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
2020-12-28 23:26 ` [f2fs-dev] " Eric Biggers
@ 2021-01-04 8:43 ` Chao Yu
-1 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2021-01-04 8:43 UTC (permalink / raw)
To: Eric Biggers, linux-f2fs-devel; +Cc: linux-fscrypt
Hi Eric,
On 2021/1/4 11:45, Eric Biggers wrote:
> That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
> it's actually needed.
Yup, now I see.
Some comments as below.
On 2020/12/29 7:26, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> Rework the post-read processing logic to be much easier to understand.
>
> At least one bug is fixed by this: if an I/O error occurred when reading
> from disk, decryption and verity would be performed on the uninitialized
> data, causing misleading messages in the kernel log.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>
> v2: rebased onto v5.11-rc1.
>
> fs/f2fs/compress.c | 159 +++++++++++-----
> fs/f2fs/data.c | 349 ++++++++++++++----------------------
> fs/f2fs/f2fs.h | 31 +++-
> include/trace/events/f2fs.h | 4 +-
> 4 files changed, 271 insertions(+), 272 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 4bcbacfe33259..66888b108f400 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -721,38 +721,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
> return ret;
> }
>
> -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
> +static void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
> {
> - struct decompress_io_ctx *dic =
> - (struct decompress_io_ctx *)page_private(page);
> struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
> - struct f2fs_inode_info *fi= F2FS_I(dic->inode);
> + struct f2fs_inode_info *fi = F2FS_I(dic->inode);
> const struct f2fs_compress_ops *cops =
> f2fs_cops[fi->i_compress_algorithm];
> int ret;
> int i;
>
> - dec_page_count(sbi, F2FS_RD_DATA);
> -
> - if (bio->bi_status || PageError(page))
> - dic->failed = true;
> -
> - if (atomic_dec_return(&dic->pending_pages))
> - return;
> -
> - trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
> - dic->cluster_size, fi->i_compress_algorithm);
> + trace_f2fs_decompress_cluster_start(dic->inode, dic->cluster_idx,
> + dic->cluster_size,
> + fi->i_compress_algorithm);
>
> - /* submit partial compressed pages */
> if (dic->failed) {
> ret = -EIO;
> - goto out_free_dic;
> + goto out_end_io;
> }
>
> dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
> if (!dic->tpages) {
> ret = -ENOMEM;
> - goto out_free_dic;
> + goto out_end_io;
> }
>
> for (i = 0; i < dic->cluster_size; i++) {
> @@ -764,20 +754,20 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
> dic->tpages[i] = f2fs_compress_alloc_page();
> if (!dic->tpages[i]) {
> ret = -ENOMEM;
> - goto out_free_dic;
> + goto out_end_io;
> }
> }
>
> if (cops->init_decompress_ctx) {
> ret = cops->init_decompress_ctx(dic);
> if (ret)
> - goto out_free_dic;
> + goto out_end_io;
> }
>
> dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
> if (!dic->rbuf) {
> ret = -ENOMEM;
> - goto destroy_decompress_ctx;
> + goto out_destroy_decompress_ctx;
> }
>
> dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
> @@ -816,18 +806,34 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
> vm_unmap_ram(dic->cbuf, dic->nr_cpages);
> out_vunmap_rbuf:
> vm_unmap_ram(dic->rbuf, dic->cluster_size);
> -destroy_decompress_ctx:
> +out_destroy_decompress_ctx:
> if (cops->destroy_decompress_ctx)
> cops->destroy_decompress_ctx(dic);
> -out_free_dic:
> - if (!verity)
> - f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
> - ret, false);
> -
> - trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx,
> - dic->clen, ret);
> - if (!verity)
> - f2fs_free_dic(dic);
> +out_end_io:
> + trace_f2fs_decompress_cluster_end(dic->inode, dic->cluster_idx,
> + dic->clen, ret);
> + f2fs_decompress_end_io(dic, ret);
> +}
> +
> +/*
> + * This is called when a page of a compressed cluster has been read from disk
> + * (or failed to be read from disk). It checks whether this page was the last
> + * page being waited on in the cluster, and if so, it decompresses the cluster
> + * (or in the case of a failure, cleans up without actually decompressing).
> + */
> +void f2fs_end_read_compressed_page(struct page *page, bool failed)
> +{
> + struct decompress_io_ctx *dic =
> + (struct decompress_io_ctx *)page_private(page);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
> +
> + dec_page_count(sbi, F2FS_RD_DATA);
> +
> + if (failed)
> + WRITE_ONCE(dic->failed, true);
> +
> + if (atomic_dec_and_test(&dic->remaining_pages))
> + f2fs_decompress_cluster(dic);
> }
>
> static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index)
> @@ -1494,6 +1500,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
> return err;
> }
>
> +static void f2fs_free_dic(struct decompress_io_ctx *dic);
> +
> struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
> {
> struct decompress_io_ctx *dic;
> @@ -1512,12 +1520,14 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
>
> dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
> dic->inode = cc->inode;
> - atomic_set(&dic->pending_pages, cc->nr_cpages);
> + atomic_set(&dic->remaining_pages, cc->nr_cpages);
> dic->cluster_idx = cc->cluster_idx;
> dic->cluster_size = cc->cluster_size;
> dic->log_cluster_size = cc->log_cluster_size;
> dic->nr_cpages = cc->nr_cpages;
> + refcount_set(&dic->refcnt, 1);
> dic->failed = false;
> + dic->need_verity = f2fs_need_verity(cc->inode, start_idx);
>
> for (i = 0; i < dic->cluster_size; i++)
> dic->rpages[i] = cc->rpages[i];
> @@ -1546,7 +1556,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
> return ERR_PTR(-ENOMEM);
> }
>
> -void f2fs_free_dic(struct decompress_io_ctx *dic)
> +static void f2fs_free_dic(struct decompress_io_ctx *dic)
> {
> int i;
>
> @@ -1574,30 +1584,89 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
> kmem_cache_free(dic_entry_slab, dic);
> }
>
> -void f2fs_decompress_end_io(struct page **rpages,
> - unsigned int cluster_size, bool err, bool verity)
> +static void f2fs_put_dic(struct decompress_io_ctx *dic)
> +{
> + if (refcount_dec_and_test(&dic->refcnt))
> + f2fs_free_dic(dic);
> +}
> +
> +/*
> + * Update and unlock the cluster's decompressed pagecache pages, and release the
> + * reference to the decompress_io_ctx that was taken for decompression itself.
> + */
> +static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
> {
> int i;
>
> - for (i = 0; i < cluster_size; i++) {
> - struct page *rpage = rpages[i];
> + for (i = 0; i < dic->cluster_size; i++) {
> + struct page *rpage = dic->rpages[i];
>
> if (!rpage)
> continue;
>
> - if (err || PageError(rpage))
> - goto clear_uptodate;
> -
> - if (!verity || fsverity_verify_page(rpage)) {
> + /* PG_error was set if verity failed. */
> + if (failed || PageError(rpage)) {
> + ClearPageUptodate(rpage);
> + /* will re-read again later */
> + ClearPageError(rpage);
> + } else {
> SetPageUptodate(rpage);
> - goto unlock;
> }
> -clear_uptodate:
> - ClearPageUptodate(rpage);
> - ClearPageError(rpage);
> -unlock:
> unlock_page(rpage);
> }
> +
> + f2fs_put_dic(dic);
> +}
> +
> +static void f2fs_verify_cluster(struct work_struct *work)
> +{
> + struct decompress_io_ctx *dic =
> + container_of(work, struct decompress_io_ctx, verity_work);
> + int i;
> +
> + /* Verify the cluster's decompressed pages with fs-verity. */
> + for (i = 0; i < dic->cluster_size; i++) {
> + struct page *rpage = dic->rpages[i];
> +
> + if (rpage && !fsverity_verify_page(rpage))
> + SetPageError(rpage);
> + }
> +
> + __f2fs_decompress_end_io(dic, false);
> +}
> +
> +/*
> + * This is called when a compressed cluster has been decompressed
> + * (or failed to be read and/or decompressed).
> + */
> +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
> +{
> + if (!failed && dic->need_verity) {
> + /*
> + * Note that to avoid deadlocks, the verity work can't be done
> + * on the decompression workqueue. This is because verifying
> + * the data pages can involve reading metadata pages from the
> + * file, and these metadata pages may be compressed.
> + */
> + INIT_WORK(&dic->verity_work, f2fs_verify_cluster);
> + fsverity_enqueue_verify_work(&dic->verity_work);
> + } else {
> + __f2fs_decompress_end_io(dic, failed);
> + }
> +}
> +
> +/*
> + * Put a reference to the decompression context held by a compressed page in a
> + * bio. We needed this reference in order to keep the compressed pages around
> + * until the bio(s) that contain them have been freed; sometimes that doesn't
> + * happen until after the decompression has finished.
> + */
> +void f2fs_put_page_decompress_io_ctx(struct page *page)
> +{
> + struct decompress_io_ctx *dic =
> + (struct decompress_io_ctx *)page_private(page);
> +
> + f2fs_put_dic(dic);
> }
>
> int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index aa34d620bec98..d4e86639707f4 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -115,10 +115,21 @@ static enum count_type __read_io_type(struct page *page)
>
> /* postprocessing steps for read bios */
> enum bio_post_read_step {
> - STEP_DECRYPT,
> - STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
> - STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
> - STEP_VERITY,
> +#ifdef CONFIG_FS_ENCRYPTION
> + STEP_DECRYPT = 1 << 0,
> +#else
> + STEP_DECRYPT = 0, /* compile out the decryption-related code */
> +#endif
> +#ifdef CONFIG_F2FS_FS_COMPRESSION
> + STEP_DECOMPRESS = 1 << 1,
> +#else
> + STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
> +#endif
> +#ifdef CONFIG_FS_VERITY
> + STEP_VERITY = 1 << 2,
> +#else
> + STEP_VERITY = 0, /* compile out the verity-related code */
> +#endif
> };
>
> struct bio_post_read_ctx {
> @@ -128,25 +139,26 @@ struct bio_post_read_ctx {
> unsigned int enabled_steps;
> };
>
> -static void __read_end_io(struct bio *bio, bool compr, bool verity)
> +static void f2fs_finish_read_bio(struct bio *bio)
> {
> - struct page *page;
> struct bio_vec *bv;
> struct bvec_iter_all iter_all;
>
> + /*
> + * Update and unlock the bio's pagecache pages, and put the
> + * decompression context for any compressed pages.
> + */
> bio_for_each_segment_all(bv, bio, iter_all) {
> - page = bv->bv_page;
> + struct page *page = bv->bv_page;
>
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> - if (compr && f2fs_is_compressed_page(page)) {
> - f2fs_decompress_pages(bio, page, verity);
> + if (f2fs_is_compressed_page(page)) {
> + if (bio->bi_status)
> + f2fs_end_read_compressed_page(page, true);
> + f2fs_put_page_decompress_io_ctx(page);
> continue;
> }
> - if (verity)
> - continue;
> -#endif
>
> - /* PG_error was set if any post_read step failed */
> + /* PG_error was set if decryption or verity failed. */
> if (bio->bi_status || PageError(page)) {
> ClearPageUptodate(page);
> /* will re-read again later */
> @@ -157,181 +169,129 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity)
> dec_page_count(F2FS_P_SB(page), __read_io_type(page));
> unlock_page(page);
> }
> -}
> -
> -static void f2fs_release_read_bio(struct bio *bio);
> -static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
> -{
> - if (!compr)
> - __read_end_io(bio, false, verity);
> - f2fs_release_read_bio(bio);
> -}
> -
> -static void f2fs_decompress_bio(struct bio *bio, bool verity)
> -{
> - __read_end_io(bio, true, verity);
> -}
> -
> -static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
> -
> -static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
> -{
> - fscrypt_decrypt_bio(ctx->bio);
> -}
> -
> -static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
> -{
> - f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
> -}
> -
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> -static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
> -{
> - f2fs_decompress_end_io(rpages, cluster_size, false, true);
> -}
> -
> -static void f2fs_verify_bio(struct bio *bio)
> -{
> - struct bio_vec *bv;
> - struct bvec_iter_all iter_all;
>
> - bio_for_each_segment_all(bv, bio, iter_all) {
> - struct page *page = bv->bv_page;
> - struct decompress_io_ctx *dic;
> -
> - dic = (struct decompress_io_ctx *)page_private(page);
> -
> - if (dic) {
> - if (atomic_dec_return(&dic->verity_pages))
> - continue;
> - f2fs_verify_pages(dic->rpages,
> - dic->cluster_size);
> - f2fs_free_dic(dic);
> - continue;
> - }
> -
> - if (bio->bi_status || PageError(page))
> - goto clear_uptodate;
> -
> - if (fsverity_verify_page(page)) {
> - SetPageUptodate(page);
> - goto unlock;
> - }
> -clear_uptodate:
> - ClearPageUptodate(page);
> - ClearPageError(page);
> -unlock:
> - dec_page_count(F2FS_P_SB(page), __read_io_type(page));
> - unlock_page(page);
> - }
> + if (bio->bi_private)
> + mempool_free(bio->bi_private, bio_post_read_ctx_pool);
> + bio_put(bio);
> }
> -#endif
>
> -static void f2fs_verity_work(struct work_struct *work)
> +static void f2fs_verify_bio(struct work_struct *work)
> {
> struct bio_post_read_ctx *ctx =
> container_of(work, struct bio_post_read_ctx, work);
> struct bio *bio = ctx->bio;
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> - unsigned int enabled_steps = ctx->enabled_steps;
> -#endif
> + bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
>
> /*
> * fsverity_verify_bio() may call readpages() again, and while verity
> - * will be disabled for this, decryption may still be needed, resulting
> - * in another bio_post_read_ctx being allocated. So to prevent
> - * deadlocks we need to release the current ctx to the mempool first.
> - * This assumes that verity is the last post-read step.
> + * will be disabled for this, decryption and/or decompression may still
> + * be needed, resulting in another bio_post_read_ctx being allocated.
> + * So to prevent deadlocks we need to release the current ctx to the
> + * mempool first. This assumes that verity is the last post-read step.
> */
> mempool_free(ctx, bio_post_read_ctx_pool);
> bio->bi_private = NULL;
>
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> - /* previous step is decompression */
> - if (enabled_steps & (1 << STEP_DECOMPRESS)) {
> - f2fs_verify_bio(bio);
> - f2fs_release_read_bio(bio);
> - return;
> + /*
> + * Verify the bio's pages with fs-verity. Exclude compressed pages,
> + * as those were handled separately by f2fs_end_read_compressed_page().
> + */
> + if (may_have_compressed_pages) {
> + struct bio_vec *bv;
> + struct bvec_iter_all iter_all;
> +
> + bio_for_each_segment_all(bv, bio, iter_all) {
> + struct page *page = bv->bv_page;
> +
> + if (!f2fs_is_compressed_page(page) &&
> + !PageError(page) && !fsverity_verify_page(page))
> + SetPageError(page);
> + }
> + } else {
> + fsverity_verify_bio(bio);
> }
> -#endif
>
> - fsverity_verify_bio(bio);
> - __f2fs_read_end_io(bio, false, false);
> + f2fs_finish_read_bio(bio);
> }
>
> -static void f2fs_post_read_work(struct work_struct *work)
> +/*
> + * If the bio's data needs to be verified with fs-verity, then enqueue the
> + * verity work for the bio. Otherwise finish the bio now.
> + *
> + * Note that to avoid deadlocks, the verity work can't be done on the
> + * decryption/decompression workqueue. This is because verifying the data pages
> + * can involve reading verity metadata pages from the file, and these verity
> + * metadata pages may be encrypted and/or compressed.
> + */
> +static void f2fs_verify_and_finish_bio(struct bio *bio)
> {
> - struct bio_post_read_ctx *ctx =
> - container_of(work, struct bio_post_read_ctx, work);
> -
> - if (ctx->enabled_steps & (1 << STEP_DECRYPT))
> - f2fs_decrypt_work(ctx);
> + struct bio_post_read_ctx *ctx = bio->bi_private;
>
> - if (ctx->enabled_steps & (1 << STEP_DECOMPRESS))
> - f2fs_decompress_work(ctx);
> -
> - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
> - INIT_WORK(&ctx->work, f2fs_verity_work);
> + if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
> + INIT_WORK(&ctx->work, f2fs_verify_bio);
> fsverity_enqueue_verify_work(&ctx->work);
> - return;
> + } else {
> + f2fs_finish_read_bio(bio);
> }
> -
> - __f2fs_read_end_io(ctx->bio,
> - ctx->enabled_steps & (1 << STEP_DECOMPRESS), false);
> }
>
> -static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi,
> - struct work_struct *work)
> +static void f2fs_post_read_work(struct work_struct *work)
> {
> - queue_work(sbi->post_read_wq, work);
> -}
> + struct bio_post_read_ctx *ctx =
> + container_of(work, struct bio_post_read_ctx, work);
> + struct bio *bio = ctx->bio;
>
> -static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
> -{
> - /*
> - * We use different work queues for decryption and for verity because
> - * verity may require reading metadata pages that need decryption, and
> - * we shouldn't recurse to the same workqueue.
> - */
> + if (ctx->enabled_steps & STEP_DECRYPT)
> + fscrypt_decrypt_bio(bio);
>
> - if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
> - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
> - INIT_WORK(&ctx->work, f2fs_post_read_work);
> - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
> - return;
> - }
> + if (ctx->enabled_steps & STEP_DECOMPRESS) {
> + struct bio_vec *bv;
> + struct bvec_iter_all iter_all;
> + bool all_compressed = true;
>
> - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
> - INIT_WORK(&ctx->work, f2fs_verity_work);
> - fsverity_enqueue_verify_work(&ctx->work);
> - return;
> - }
> + bio_for_each_segment_all(bv, bio, iter_all) {
> + struct page *page = bv->bv_page;
> + /* PG_error will be set if decryption failed. */
> + bool failed = PageError(page);
>
> - __f2fs_read_end_io(ctx->bio, false, false);
> -}
> + if (f2fs_is_compressed_page(page))
> + f2fs_end_read_compressed_page(page, failed);
> + else
> + all_compressed = false;
> + }
> + /*
> + * Optimization: if all the bio's pages are compressed, then
> + * scheduling the per-bio verity work is unnecessary, as verity
> + * will be fully handled at the compression cluster level.
> + */
> + if (all_compressed)
> + ctx->enabled_steps &= ~STEP_VERITY;
> + }
Can we wrap above logic into a function for cleanup?
>
> -static bool f2fs_bio_post_read_required(struct bio *bio)
> -{
> - return bio->bi_private;
> + f2fs_verify_and_finish_bio(bio);
> }
>
> static void f2fs_read_end_io(struct bio *bio)
> {
> struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
> + struct bio_post_read_ctx *ctx = bio->bi_private;
>
> if (time_to_inject(sbi, FAULT_READ_IO)) {
> f2fs_show_injection_info(sbi, FAULT_READ_IO);
> bio->bi_status = BLK_STS_IOERR;
> }
>
> - if (f2fs_bio_post_read_required(bio)) {
> - struct bio_post_read_ctx *ctx = bio->bi_private;
> -
> - bio_post_read_processing(ctx);
> + if (bio->bi_status) {
> + f2fs_finish_read_bio(bio);
> return;
> }
>
> - __f2fs_read_end_io(bio, false, false);
> + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
> + INIT_WORK(&ctx->work, f2fs_post_read_work);
> + queue_work(ctx->sbi->post_read_wq, &ctx->work);
> + } else {
> + f2fs_verify_and_finish_bio(bio);
> + }
> }
>
> static void f2fs_write_end_io(struct bio *bio)
> @@ -1022,16 +982,9 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> up_write(&io->io_rwsem);
> }
>
> -static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
> -{
> - return fsverity_active(inode) &&
> - idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
> -}
> -
> static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> unsigned nr_pages, unsigned op_flag,
> - pgoff_t first_idx, bool for_write,
> - bool for_verity)
> + pgoff_t first_idx, bool for_write)
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> struct bio *bio;
> @@ -1050,13 +1003,19 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
>
> if (fscrypt_inode_uses_fs_layer_crypto(inode))
> - post_read_steps |= 1 << STEP_DECRYPT;
> - if (f2fs_compressed_file(inode))
> - post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
> - if (for_verity && f2fs_need_verity(inode, first_idx))
> - post_read_steps |= 1 << STEP_VERITY;
> + post_read_steps |= STEP_DECRYPT;
> +
> + if (f2fs_need_verity(inode, first_idx))
> + post_read_steps |= STEP_VERITY;
>
> - if (post_read_steps) {
> + /*
> + * STEP_DECOMPRESS is handled specially, since a compressed file might
> + * contain both compressed and uncompressed clusters. We'll allocate a
> + * bio_post_read_ctx if the file is compressed, but the caller is
> + * responsible for enabling STEP_DECOMPRESS if it's actually needed.
> + */
> +
> + if (post_read_steps || f2fs_compressed_file(inode)) {
> /* Due to the mempool, this never fails. */
> ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
> ctx->bio = bio;
> @@ -1068,13 +1027,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> return bio;
> }
>
> -static void f2fs_release_read_bio(struct bio *bio)
> -{
> - if (bio->bi_private)
> - mempool_free(bio->bi_private, bio_post_read_ctx_pool);
> - bio_put(bio);
> -}
> -
> /* This can handle encryption stuffs */
> static int f2fs_submit_page_read(struct inode *inode, struct page *page,
> block_t blkaddr, int op_flags, bool for_write)
> @@ -1083,7 +1035,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
> struct bio *bio;
>
> bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
> - page->index, for_write, true);
> + page->index, for_write);
> if (IS_ERR(bio))
> return PTR_ERR(bio);
>
> @@ -2121,7 +2073,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
> if (bio == NULL) {
> bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0, page->index,
> - false, true);
> + false);
> if (IS_ERR(bio)) {
> ret = PTR_ERR(bio);
> bio = NULL;
> @@ -2167,8 +2119,6 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> sector_t last_block_in_file;
> const unsigned blocksize = blks_to_bytes(inode, 1);
> struct decompress_io_ctx *dic = NULL;
> - struct bio_post_read_ctx *ctx;
> - bool for_verity = false;
> int i;
> int ret = 0;
>
> @@ -2234,29 +2184,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> goto out_put_dnode;
> }
>
> - /*
> - * It's possible to enable fsverity on the fly when handling a cluster,
> - * which requires complicated error handling. Instead of adding more
> - * complexity, let's give a rule where end_io post-processes fsverity
> - * per cluster. In order to do that, we need to submit bio, if previous
> - * bio sets a different post-process policy.
> - */
> - if (fsverity_active(cc->inode)) {
> - atomic_set(&dic->verity_pages, cc->nr_cpages);
> - for_verity = true;
> -
> - if (bio) {
> - ctx = bio->bi_private;
> - if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
> - __submit_bio(sbi, bio, DATA);
> - bio = NULL;
> - }
> - }
> - }
> -
> for (i = 0; i < dic->nr_cpages; i++) {
> struct page *page = dic->cpages[i];
> block_t blkaddr;
> + struct bio_post_read_ctx *ctx;
>
> blkaddr = data_blkaddr(dn.inode, dn.node_page,
> dn.ofs_in_node + i + 1);
> @@ -2272,31 +2203,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> if (!bio) {
> bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0,
> - page->index, for_write, for_verity);
> + page->index, for_write);
> if (IS_ERR(bio)) {
> - unsigned int remained = dic->nr_cpages - i;
> - bool release = false;
> -
> ret = PTR_ERR(bio);
> - dic->failed = true;
> -
> - if (for_verity) {
> - if (!atomic_sub_return(remained,
> - &dic->verity_pages))
> - release = true;
> - } else {
> - if (!atomic_sub_return(remained,
> - &dic->pending_pages))
> - release = true;
> - }
> -
> - if (release) {
> - f2fs_decompress_end_io(dic->rpages,
> - cc->cluster_size, true,
> - false);
> - f2fs_free_dic(dic);
> - }
> -
> + f2fs_decompress_end_io(dic, ret);
> f2fs_put_dnode(&dn);
> *bio_ret = NULL;
> return ret;
> @@ -2308,10 +2218,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> if (bio_add_page(bio, page, blocksize, 0) < blocksize)
> goto submit_and_realloc;
>
> - /* tag STEP_DECOMPRESS to handle IO in wq */
> ctx = bio->bi_private;
> - if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
> - ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
> + ctx->enabled_steps |= STEP_DECOMPRESS;
> + refcount_inc(&dic->refcnt);
>
> inc_page_count(sbi, F2FS_RD_DATA);
> f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
> @@ -2328,7 +2237,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> out_put_dnode:
> f2fs_put_dnode(&dn);
> out:
> - f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false);
> + for (i = 0; i < cc->cluster_size; i++) {
> + if (cc->rpages[i]) {
> + ClearPageUptodate(cc->rpages[i]);
> + ClearPageError(cc->rpages[i]);
> + unlock_page(cc->rpages[i]);
> + }
> + }
> *bio_ret = bio;
> return ret;
> }
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index bb11759191dcc..ed2ce437357c2 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1337,7 +1337,7 @@ struct compress_io_ctx {
> atomic_t pending_pages; /* in-flight compressed page count */
> };
>
> -/* decompress io context for read IO path */
> +/* Context for decompressing one cluster on the read IO path */
> struct decompress_io_ctx {
> u32 magic; /* magic number to indicate page is compressed */
> struct inode *inode; /* inode the context belong to */
> @@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
> struct compress_data *cbuf; /* virtual mapped address on cpages */
> size_t rlen; /* valid data length in rbuf */
> size_t clen; /* valid data length in cbuf */
> - atomic_t pending_pages; /* in-flight compressed page count */
> - atomic_t verity_pages; /* in-flight page count for verity */
> - bool failed; /* indicate IO error during decompression */
> + atomic_t remaining_pages; /* number of compressed pages remaining to be read */
> + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
Now, we use .remaining_pages to control to trigger cluster decompression;
and .refcnt to control to release dic structure.
How about adding a bit more description about above info for better
readability?
> + bool failed; /* IO error occurred before decompression? */
> + bool need_verity; /* need fs-verity verification after decompression? */
> void *private; /* payload buffer for specified decompression algorithm */
> void *private2; /* extra payload buffer */
> + struct work_struct verity_work; /* work to verify the decompressed pages */
> };
>
> #define NULL_CLUSTER ((unsigned int)(~0))
> @@ -3876,7 +3878,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
> bool f2fs_is_compress_backend_ready(struct inode *inode);
> int f2fs_init_compress_mempool(void);
> void f2fs_destroy_compress_mempool(void);
> -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity);
> +void f2fs_end_read_compressed_page(struct page *page, bool failed);
> bool f2fs_cluster_is_empty(struct compress_ctx *cc);
> bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
> void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
> @@ -3889,9 +3891,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> unsigned nr_pages, sector_t *last_block_in_bio,
> bool is_readahead, bool for_write);
> struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
> -void f2fs_free_dic(struct decompress_io_ctx *dic);
> -void f2fs_decompress_end_io(struct page **rpages,
> - unsigned int cluster_size, bool err, bool verity);
> +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
> +void f2fs_put_page_decompress_io_ctx(struct page *page);
> int f2fs_init_compress_ctx(struct compress_ctx *cc);
> void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
> void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
> @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
> }
> static inline int f2fs_init_compress_mempool(void) { return 0; }
> static inline void f2fs_destroy_compress_mempool(void) { }
> +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
> +{
> + WARN_ON_ONCE(1);
> +}
> +static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
f2fs_put_page_in_dic() or f2fs_put_dic_page()?
> +{
> + WARN_ON_ONCE(1);
> +}
> static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
> static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
> static inline int __init f2fs_init_compress_cache(void) { return 0; }
> @@ -4114,6 +4123,12 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
> return false;
> }
>
> +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
> +{
> + return fsverity_active(inode) &&
> + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
> +}
> +
> #ifdef CONFIG_F2FS_FAULT_INJECTION
> extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> unsigned int type);
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index 56b113e3cd6aa..9e2981733ea4a 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
> TP_ARGS(inode, cluster_idx, cluster_size, algtype)
> );
>
> -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
> +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
I suggest keeping original tracepoint name, it can avoid breaking userspace
binary or script.
Thanks,
>
> TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
> unsigned int cluster_size, unsigned char algtype),
> @@ -1810,7 +1810,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end,
> TP_ARGS(inode, cluster_idx, compressed_size, ret)
> );
>
> -DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
> +DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_cluster_end,
>
> TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
> unsigned int compressed_size, int ret),
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
@ 2021-01-04 8:43 ` Chao Yu
0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2021-01-04 8:43 UTC (permalink / raw)
To: Eric Biggers, linux-f2fs-devel; +Cc: linux-fscrypt
Hi Eric,
On 2021/1/4 11:45, Eric Biggers wrote:
> That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
> it's actually needed.
Yup, now I see.
Some comments as below.
On 2020/12/29 7:26, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> Rework the post-read processing logic to be much easier to understand.
>
> At least one bug is fixed by this: if an I/O error occurred when reading
> from disk, decryption and verity would be performed on the uninitialized
> data, causing misleading messages in the kernel log.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>
> v2: rebased onto v5.11-rc1.
>
> fs/f2fs/compress.c | 159 +++++++++++-----
> fs/f2fs/data.c | 349 ++++++++++++++----------------------
> fs/f2fs/f2fs.h | 31 +++-
> include/trace/events/f2fs.h | 4 +-
> 4 files changed, 271 insertions(+), 272 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 4bcbacfe33259..66888b108f400 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -721,38 +721,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
> return ret;
> }
>
> -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
> +static void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
> {
> - struct decompress_io_ctx *dic =
> - (struct decompress_io_ctx *)page_private(page);
> struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
> - struct f2fs_inode_info *fi= F2FS_I(dic->inode);
> + struct f2fs_inode_info *fi = F2FS_I(dic->inode);
> const struct f2fs_compress_ops *cops =
> f2fs_cops[fi->i_compress_algorithm];
> int ret;
> int i;
>
> - dec_page_count(sbi, F2FS_RD_DATA);
> -
> - if (bio->bi_status || PageError(page))
> - dic->failed = true;
> -
> - if (atomic_dec_return(&dic->pending_pages))
> - return;
> -
> - trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
> - dic->cluster_size, fi->i_compress_algorithm);
> + trace_f2fs_decompress_cluster_start(dic->inode, dic->cluster_idx,
> + dic->cluster_size,
> + fi->i_compress_algorithm);
>
> - /* submit partial compressed pages */
> if (dic->failed) {
> ret = -EIO;
> - goto out_free_dic;
> + goto out_end_io;
> }
>
> dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
> if (!dic->tpages) {
> ret = -ENOMEM;
> - goto out_free_dic;
> + goto out_end_io;
> }
>
> for (i = 0; i < dic->cluster_size; i++) {
> @@ -764,20 +754,20 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
> dic->tpages[i] = f2fs_compress_alloc_page();
> if (!dic->tpages[i]) {
> ret = -ENOMEM;
> - goto out_free_dic;
> + goto out_end_io;
> }
> }
>
> if (cops->init_decompress_ctx) {
> ret = cops->init_decompress_ctx(dic);
> if (ret)
> - goto out_free_dic;
> + goto out_end_io;
> }
>
> dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
> if (!dic->rbuf) {
> ret = -ENOMEM;
> - goto destroy_decompress_ctx;
> + goto out_destroy_decompress_ctx;
> }
>
> dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
> @@ -816,18 +806,34 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
> vm_unmap_ram(dic->cbuf, dic->nr_cpages);
> out_vunmap_rbuf:
> vm_unmap_ram(dic->rbuf, dic->cluster_size);
> -destroy_decompress_ctx:
> +out_destroy_decompress_ctx:
> if (cops->destroy_decompress_ctx)
> cops->destroy_decompress_ctx(dic);
> -out_free_dic:
> - if (!verity)
> - f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
> - ret, false);
> -
> - trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx,
> - dic->clen, ret);
> - if (!verity)
> - f2fs_free_dic(dic);
> +out_end_io:
> + trace_f2fs_decompress_cluster_end(dic->inode, dic->cluster_idx,
> + dic->clen, ret);
> + f2fs_decompress_end_io(dic, ret);
> +}
> +
> +/*
> + * This is called when a page of a compressed cluster has been read from disk
> + * (or failed to be read from disk). It checks whether this page was the last
> + * page being waited on in the cluster, and if so, it decompresses the cluster
> + * (or in the case of a failure, cleans up without actually decompressing).
> + */
> +void f2fs_end_read_compressed_page(struct page *page, bool failed)
> +{
> + struct decompress_io_ctx *dic =
> + (struct decompress_io_ctx *)page_private(page);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
> +
> + dec_page_count(sbi, F2FS_RD_DATA);
> +
> + if (failed)
> + WRITE_ONCE(dic->failed, true);
> +
> + if (atomic_dec_and_test(&dic->remaining_pages))
> + f2fs_decompress_cluster(dic);
> }
>
> static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index)
> @@ -1494,6 +1500,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
> return err;
> }
>
> +static void f2fs_free_dic(struct decompress_io_ctx *dic);
> +
> struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
> {
> struct decompress_io_ctx *dic;
> @@ -1512,12 +1520,14 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
>
> dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
> dic->inode = cc->inode;
> - atomic_set(&dic->pending_pages, cc->nr_cpages);
> + atomic_set(&dic->remaining_pages, cc->nr_cpages);
> dic->cluster_idx = cc->cluster_idx;
> dic->cluster_size = cc->cluster_size;
> dic->log_cluster_size = cc->log_cluster_size;
> dic->nr_cpages = cc->nr_cpages;
> + refcount_set(&dic->refcnt, 1);
> dic->failed = false;
> + dic->need_verity = f2fs_need_verity(cc->inode, start_idx);
>
> for (i = 0; i < dic->cluster_size; i++)
> dic->rpages[i] = cc->rpages[i];
> @@ -1546,7 +1556,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
> return ERR_PTR(-ENOMEM);
> }
>
> -void f2fs_free_dic(struct decompress_io_ctx *dic)
> +static void f2fs_free_dic(struct decompress_io_ctx *dic)
> {
> int i;
>
> @@ -1574,30 +1584,89 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
> kmem_cache_free(dic_entry_slab, dic);
> }
>
> -void f2fs_decompress_end_io(struct page **rpages,
> - unsigned int cluster_size, bool err, bool verity)
> +static void f2fs_put_dic(struct decompress_io_ctx *dic)
> +{
> + if (refcount_dec_and_test(&dic->refcnt))
> + f2fs_free_dic(dic);
> +}
> +
> +/*
> + * Update and unlock the cluster's decompressed pagecache pages, and release the
> + * reference to the decompress_io_ctx that was taken for decompression itself.
> + */
> +static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
> {
> int i;
>
> - for (i = 0; i < cluster_size; i++) {
> - struct page *rpage = rpages[i];
> + for (i = 0; i < dic->cluster_size; i++) {
> + struct page *rpage = dic->rpages[i];
>
> if (!rpage)
> continue;
>
> - if (err || PageError(rpage))
> - goto clear_uptodate;
> -
> - if (!verity || fsverity_verify_page(rpage)) {
> + /* PG_error was set if verity failed. */
> + if (failed || PageError(rpage)) {
> + ClearPageUptodate(rpage);
> + /* will re-read again later */
> + ClearPageError(rpage);
> + } else {
> SetPageUptodate(rpage);
> - goto unlock;
> }
> -clear_uptodate:
> - ClearPageUptodate(rpage);
> - ClearPageError(rpage);
> -unlock:
> unlock_page(rpage);
> }
> +
> + f2fs_put_dic(dic);
> +}
> +
> +static void f2fs_verify_cluster(struct work_struct *work)
> +{
> + struct decompress_io_ctx *dic =
> + container_of(work, struct decompress_io_ctx, verity_work);
> + int i;
> +
> + /* Verify the cluster's decompressed pages with fs-verity. */
> + for (i = 0; i < dic->cluster_size; i++) {
> + struct page *rpage = dic->rpages[i];
> +
> + if (rpage && !fsverity_verify_page(rpage))
> + SetPageError(rpage);
> + }
> +
> + __f2fs_decompress_end_io(dic, false);
> +}
> +
> +/*
> + * This is called when a compressed cluster has been decompressed
> + * (or failed to be read and/or decompressed).
> + */
> +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
> +{
> + if (!failed && dic->need_verity) {
> + /*
> + * Note that to avoid deadlocks, the verity work can't be done
> + * on the decompression workqueue. This is because verifying
> + * the data pages can involve reading metadata pages from the
> + * file, and these metadata pages may be compressed.
> + */
> + INIT_WORK(&dic->verity_work, f2fs_verify_cluster);
> + fsverity_enqueue_verify_work(&dic->verity_work);
> + } else {
> + __f2fs_decompress_end_io(dic, failed);
> + }
> +}
> +
> +/*
> + * Put a reference to the decompression context held by a compressed page in a
> + * bio. We needed this reference in order to keep the compressed pages around
> + * until the bio(s) that contain them have been freed; sometimes that doesn't
> + * happen until after the decompression has finished.
> + */
> +void f2fs_put_page_decompress_io_ctx(struct page *page)
> +{
> + struct decompress_io_ctx *dic =
> + (struct decompress_io_ctx *)page_private(page);
> +
> + f2fs_put_dic(dic);
> }
>
> int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index aa34d620bec98..d4e86639707f4 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -115,10 +115,21 @@ static enum count_type __read_io_type(struct page *page)
>
> /* postprocessing steps for read bios */
> enum bio_post_read_step {
> - STEP_DECRYPT,
> - STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
> - STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
> - STEP_VERITY,
> +#ifdef CONFIG_FS_ENCRYPTION
> + STEP_DECRYPT = 1 << 0,
> +#else
> + STEP_DECRYPT = 0, /* compile out the decryption-related code */
> +#endif
> +#ifdef CONFIG_F2FS_FS_COMPRESSION
> + STEP_DECOMPRESS = 1 << 1,
> +#else
> + STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
> +#endif
> +#ifdef CONFIG_FS_VERITY
> + STEP_VERITY = 1 << 2,
> +#else
> + STEP_VERITY = 0, /* compile out the verity-related code */
> +#endif
> };
>
> struct bio_post_read_ctx {
> @@ -128,25 +139,26 @@ struct bio_post_read_ctx {
> unsigned int enabled_steps;
> };
>
> -static void __read_end_io(struct bio *bio, bool compr, bool verity)
> +static void f2fs_finish_read_bio(struct bio *bio)
> {
> - struct page *page;
> struct bio_vec *bv;
> struct bvec_iter_all iter_all;
>
> + /*
> + * Update and unlock the bio's pagecache pages, and put the
> + * decompression context for any compressed pages.
> + */
> bio_for_each_segment_all(bv, bio, iter_all) {
> - page = bv->bv_page;
> + struct page *page = bv->bv_page;
>
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> - if (compr && f2fs_is_compressed_page(page)) {
> - f2fs_decompress_pages(bio, page, verity);
> + if (f2fs_is_compressed_page(page)) {
> + if (bio->bi_status)
> + f2fs_end_read_compressed_page(page, true);
> + f2fs_put_page_decompress_io_ctx(page);
> continue;
> }
> - if (verity)
> - continue;
> -#endif
>
> - /* PG_error was set if any post_read step failed */
> + /* PG_error was set if decryption or verity failed. */
> if (bio->bi_status || PageError(page)) {
> ClearPageUptodate(page);
> /* will re-read again later */
> @@ -157,181 +169,129 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity)
> dec_page_count(F2FS_P_SB(page), __read_io_type(page));
> unlock_page(page);
> }
> -}
> -
> -static void f2fs_release_read_bio(struct bio *bio);
> -static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
> -{
> - if (!compr)
> - __read_end_io(bio, false, verity);
> - f2fs_release_read_bio(bio);
> -}
> -
> -static void f2fs_decompress_bio(struct bio *bio, bool verity)
> -{
> - __read_end_io(bio, true, verity);
> -}
> -
> -static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
> -
> -static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
> -{
> - fscrypt_decrypt_bio(ctx->bio);
> -}
> -
> -static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
> -{
> - f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
> -}
> -
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> -static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
> -{
> - f2fs_decompress_end_io(rpages, cluster_size, false, true);
> -}
> -
> -static void f2fs_verify_bio(struct bio *bio)
> -{
> - struct bio_vec *bv;
> - struct bvec_iter_all iter_all;
>
> - bio_for_each_segment_all(bv, bio, iter_all) {
> - struct page *page = bv->bv_page;
> - struct decompress_io_ctx *dic;
> -
> - dic = (struct decompress_io_ctx *)page_private(page);
> -
> - if (dic) {
> - if (atomic_dec_return(&dic->verity_pages))
> - continue;
> - f2fs_verify_pages(dic->rpages,
> - dic->cluster_size);
> - f2fs_free_dic(dic);
> - continue;
> - }
> -
> - if (bio->bi_status || PageError(page))
> - goto clear_uptodate;
> -
> - if (fsverity_verify_page(page)) {
> - SetPageUptodate(page);
> - goto unlock;
> - }
> -clear_uptodate:
> - ClearPageUptodate(page);
> - ClearPageError(page);
> -unlock:
> - dec_page_count(F2FS_P_SB(page), __read_io_type(page));
> - unlock_page(page);
> - }
> + if (bio->bi_private)
> + mempool_free(bio->bi_private, bio_post_read_ctx_pool);
> + bio_put(bio);
> }
> -#endif
>
> -static void f2fs_verity_work(struct work_struct *work)
> +static void f2fs_verify_bio(struct work_struct *work)
> {
> struct bio_post_read_ctx *ctx =
> container_of(work, struct bio_post_read_ctx, work);
> struct bio *bio = ctx->bio;
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> - unsigned int enabled_steps = ctx->enabled_steps;
> -#endif
> + bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
>
> /*
> * fsverity_verify_bio() may call readpages() again, and while verity
> - * will be disabled for this, decryption may still be needed, resulting
> - * in another bio_post_read_ctx being allocated. So to prevent
> - * deadlocks we need to release the current ctx to the mempool first.
> - * This assumes that verity is the last post-read step.
> + * will be disabled for this, decryption and/or decompression may still
> + * be needed, resulting in another bio_post_read_ctx being allocated.
> + * So to prevent deadlocks we need to release the current ctx to the
> + * mempool first. This assumes that verity is the last post-read step.
> */
> mempool_free(ctx, bio_post_read_ctx_pool);
> bio->bi_private = NULL;
>
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> - /* previous step is decompression */
> - if (enabled_steps & (1 << STEP_DECOMPRESS)) {
> - f2fs_verify_bio(bio);
> - f2fs_release_read_bio(bio);
> - return;
> + /*
> + * Verify the bio's pages with fs-verity. Exclude compressed pages,
> + * as those were handled separately by f2fs_end_read_compressed_page().
> + */
> + if (may_have_compressed_pages) {
> + struct bio_vec *bv;
> + struct bvec_iter_all iter_all;
> +
> + bio_for_each_segment_all(bv, bio, iter_all) {
> + struct page *page = bv->bv_page;
> +
> + if (!f2fs_is_compressed_page(page) &&
> + !PageError(page) && !fsverity_verify_page(page))
> + SetPageError(page);
> + }
> + } else {
> + fsverity_verify_bio(bio);
> }
> -#endif
>
> - fsverity_verify_bio(bio);
> - __f2fs_read_end_io(bio, false, false);
> + f2fs_finish_read_bio(bio);
> }
>
> -static void f2fs_post_read_work(struct work_struct *work)
> +/*
> + * If the bio's data needs to be verified with fs-verity, then enqueue the
> + * verity work for the bio. Otherwise finish the bio now.
> + *
> + * Note that to avoid deadlocks, the verity work can't be done on the
> + * decryption/decompression workqueue. This is because verifying the data pages
> + * can involve reading verity metadata pages from the file, and these verity
> + * metadata pages may be encrypted and/or compressed.
> + */
> +static void f2fs_verify_and_finish_bio(struct bio *bio)
> {
> - struct bio_post_read_ctx *ctx =
> - container_of(work, struct bio_post_read_ctx, work);
> -
> - if (ctx->enabled_steps & (1 << STEP_DECRYPT))
> - f2fs_decrypt_work(ctx);
> + struct bio_post_read_ctx *ctx = bio->bi_private;
>
> - if (ctx->enabled_steps & (1 << STEP_DECOMPRESS))
> - f2fs_decompress_work(ctx);
> -
> - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
> - INIT_WORK(&ctx->work, f2fs_verity_work);
> + if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
> + INIT_WORK(&ctx->work, f2fs_verify_bio);
> fsverity_enqueue_verify_work(&ctx->work);
> - return;
> + } else {
> + f2fs_finish_read_bio(bio);
> }
> -
> - __f2fs_read_end_io(ctx->bio,
> - ctx->enabled_steps & (1 << STEP_DECOMPRESS), false);
> }
>
> -static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi,
> - struct work_struct *work)
> +static void f2fs_post_read_work(struct work_struct *work)
> {
> - queue_work(sbi->post_read_wq, work);
> -}
> + struct bio_post_read_ctx *ctx =
> + container_of(work, struct bio_post_read_ctx, work);
> + struct bio *bio = ctx->bio;
>
> -static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
> -{
> - /*
> - * We use different work queues for decryption and for verity because
> - * verity may require reading metadata pages that need decryption, and
> - * we shouldn't recurse to the same workqueue.
> - */
> + if (ctx->enabled_steps & STEP_DECRYPT)
> + fscrypt_decrypt_bio(bio);
>
> - if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
> - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
> - INIT_WORK(&ctx->work, f2fs_post_read_work);
> - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
> - return;
> - }
> + if (ctx->enabled_steps & STEP_DECOMPRESS) {
> + struct bio_vec *bv;
> + struct bvec_iter_all iter_all;
> + bool all_compressed = true;
>
> - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
> - INIT_WORK(&ctx->work, f2fs_verity_work);
> - fsverity_enqueue_verify_work(&ctx->work);
> - return;
> - }
> + bio_for_each_segment_all(bv, bio, iter_all) {
> + struct page *page = bv->bv_page;
> + /* PG_error will be set if decryption failed. */
> + bool failed = PageError(page);
>
> - __f2fs_read_end_io(ctx->bio, false, false);
> -}
> + if (f2fs_is_compressed_page(page))
> + f2fs_end_read_compressed_page(page, failed);
> + else
> + all_compressed = false;
> + }
> + /*
> + * Optimization: if all the bio's pages are compressed, then
> + * scheduling the per-bio verity work is unnecessary, as verity
> + * will be fully handled at the compression cluster level.
> + */
> + if (all_compressed)
> + ctx->enabled_steps &= ~STEP_VERITY;
> + }
Can we wrap above logic into a function for cleanup?
>
> -static bool f2fs_bio_post_read_required(struct bio *bio)
> -{
> - return bio->bi_private;
> + f2fs_verify_and_finish_bio(bio);
> }
>
> static void f2fs_read_end_io(struct bio *bio)
> {
> struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
> + struct bio_post_read_ctx *ctx = bio->bi_private;
>
> if (time_to_inject(sbi, FAULT_READ_IO)) {
> f2fs_show_injection_info(sbi, FAULT_READ_IO);
> bio->bi_status = BLK_STS_IOERR;
> }
>
> - if (f2fs_bio_post_read_required(bio)) {
> - struct bio_post_read_ctx *ctx = bio->bi_private;
> -
> - bio_post_read_processing(ctx);
> + if (bio->bi_status) {
> + f2fs_finish_read_bio(bio);
> return;
> }
>
> - __f2fs_read_end_io(bio, false, false);
> + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
> + INIT_WORK(&ctx->work, f2fs_post_read_work);
> + queue_work(ctx->sbi->post_read_wq, &ctx->work);
> + } else {
> + f2fs_verify_and_finish_bio(bio);
> + }
> }
>
> static void f2fs_write_end_io(struct bio *bio)
> @@ -1022,16 +982,9 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> up_write(&io->io_rwsem);
> }
>
> -static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
> -{
> - return fsverity_active(inode) &&
> - idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
> -}
> -
> static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> unsigned nr_pages, unsigned op_flag,
> - pgoff_t first_idx, bool for_write,
> - bool for_verity)
> + pgoff_t first_idx, bool for_write)
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> struct bio *bio;
> @@ -1050,13 +1003,19 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
>
> if (fscrypt_inode_uses_fs_layer_crypto(inode))
> - post_read_steps |= 1 << STEP_DECRYPT;
> - if (f2fs_compressed_file(inode))
> - post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
> - if (for_verity && f2fs_need_verity(inode, first_idx))
> - post_read_steps |= 1 << STEP_VERITY;
> + post_read_steps |= STEP_DECRYPT;
> +
> + if (f2fs_need_verity(inode, first_idx))
> + post_read_steps |= STEP_VERITY;
>
> - if (post_read_steps) {
> + /*
> + * STEP_DECOMPRESS is handled specially, since a compressed file might
> + * contain both compressed and uncompressed clusters. We'll allocate a
> + * bio_post_read_ctx if the file is compressed, but the caller is
> + * responsible for enabling STEP_DECOMPRESS if it's actually needed.
> + */
> +
> + if (post_read_steps || f2fs_compressed_file(inode)) {
> /* Due to the mempool, this never fails. */
> ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
> ctx->bio = bio;
> @@ -1068,13 +1027,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> return bio;
> }
>
> -static void f2fs_release_read_bio(struct bio *bio)
> -{
> - if (bio->bi_private)
> - mempool_free(bio->bi_private, bio_post_read_ctx_pool);
> - bio_put(bio);
> -}
> -
> /* This can handle encryption stuffs */
> static int f2fs_submit_page_read(struct inode *inode, struct page *page,
> block_t blkaddr, int op_flags, bool for_write)
> @@ -1083,7 +1035,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
> struct bio *bio;
>
> bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
> - page->index, for_write, true);
> + page->index, for_write);
> if (IS_ERR(bio))
> return PTR_ERR(bio);
>
> @@ -2121,7 +2073,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
> if (bio == NULL) {
> bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0, page->index,
> - false, true);
> + false);
> if (IS_ERR(bio)) {
> ret = PTR_ERR(bio);
> bio = NULL;
> @@ -2167,8 +2119,6 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> sector_t last_block_in_file;
> const unsigned blocksize = blks_to_bytes(inode, 1);
> struct decompress_io_ctx *dic = NULL;
> - struct bio_post_read_ctx *ctx;
> - bool for_verity = false;
> int i;
> int ret = 0;
>
> @@ -2234,29 +2184,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> goto out_put_dnode;
> }
>
> - /*
> - * It's possible to enable fsverity on the fly when handling a cluster,
> - * which requires complicated error handling. Instead of adding more
> - * complexity, let's give a rule where end_io post-processes fsverity
> - * per cluster. In order to do that, we need to submit bio, if previous
> - * bio sets a different post-process policy.
> - */
> - if (fsverity_active(cc->inode)) {
> - atomic_set(&dic->verity_pages, cc->nr_cpages);
> - for_verity = true;
> -
> - if (bio) {
> - ctx = bio->bi_private;
> - if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
> - __submit_bio(sbi, bio, DATA);
> - bio = NULL;
> - }
> - }
> - }
> -
> for (i = 0; i < dic->nr_cpages; i++) {
> struct page *page = dic->cpages[i];
> block_t blkaddr;
> + struct bio_post_read_ctx *ctx;
>
> blkaddr = data_blkaddr(dn.inode, dn.node_page,
> dn.ofs_in_node + i + 1);
> @@ -2272,31 +2203,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> if (!bio) {
> bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
> is_readahead ? REQ_RAHEAD : 0,
> - page->index, for_write, for_verity);
> + page->index, for_write);
> if (IS_ERR(bio)) {
> - unsigned int remained = dic->nr_cpages - i;
> - bool release = false;
> -
> ret = PTR_ERR(bio);
> - dic->failed = true;
> -
> - if (for_verity) {
> - if (!atomic_sub_return(remained,
> - &dic->verity_pages))
> - release = true;
> - } else {
> - if (!atomic_sub_return(remained,
> - &dic->pending_pages))
> - release = true;
> - }
> -
> - if (release) {
> - f2fs_decompress_end_io(dic->rpages,
> - cc->cluster_size, true,
> - false);
> - f2fs_free_dic(dic);
> - }
> -
> + f2fs_decompress_end_io(dic, ret);
> f2fs_put_dnode(&dn);
> *bio_ret = NULL;
> return ret;
> @@ -2308,10 +2218,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> if (bio_add_page(bio, page, blocksize, 0) < blocksize)
> goto submit_and_realloc;
>
> - /* tag STEP_DECOMPRESS to handle IO in wq */
> ctx = bio->bi_private;
> - if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
> - ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
> + ctx->enabled_steps |= STEP_DECOMPRESS;
> + refcount_inc(&dic->refcnt);
>
> inc_page_count(sbi, F2FS_RD_DATA);
> f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
> @@ -2328,7 +2237,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> out_put_dnode:
> f2fs_put_dnode(&dn);
> out:
> - f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false);
> + for (i = 0; i < cc->cluster_size; i++) {
> + if (cc->rpages[i]) {
> + ClearPageUptodate(cc->rpages[i]);
> + ClearPageError(cc->rpages[i]);
> + unlock_page(cc->rpages[i]);
> + }
> + }
> *bio_ret = bio;
> return ret;
> }
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index bb11759191dcc..ed2ce437357c2 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1337,7 +1337,7 @@ struct compress_io_ctx {
> atomic_t pending_pages; /* in-flight compressed page count */
> };
>
> -/* decompress io context for read IO path */
> +/* Context for decompressing one cluster on the read IO path */
> struct decompress_io_ctx {
> u32 magic; /* magic number to indicate page is compressed */
> struct inode *inode; /* inode the context belong to */
> @@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
> struct compress_data *cbuf; /* virtual mapped address on cpages */
> size_t rlen; /* valid data length in rbuf */
> size_t clen; /* valid data length in cbuf */
> - atomic_t pending_pages; /* in-flight compressed page count */
> - atomic_t verity_pages; /* in-flight page count for verity */
> - bool failed; /* indicate IO error during decompression */
> + atomic_t remaining_pages; /* number of compressed pages remaining to be read */
> + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
Now, we use .remaining_pages to control to trigger cluster decompression;
and .refcnt to control to release dic structure.
How about adding a bit more description about above info for better
readability?
> + bool failed; /* IO error occurred before decompression? */
> + bool need_verity; /* need fs-verity verification after decompression? */
> void *private; /* payload buffer for specified decompression algorithm */
> void *private2; /* extra payload buffer */
> + struct work_struct verity_work; /* work to verify the decompressed pages */
> };
>
> #define NULL_CLUSTER ((unsigned int)(~0))
> @@ -3876,7 +3878,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
> bool f2fs_is_compress_backend_ready(struct inode *inode);
> int f2fs_init_compress_mempool(void);
> void f2fs_destroy_compress_mempool(void);
> -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity);
> +void f2fs_end_read_compressed_page(struct page *page, bool failed);
> bool f2fs_cluster_is_empty(struct compress_ctx *cc);
> bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
> void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
> @@ -3889,9 +3891,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> unsigned nr_pages, sector_t *last_block_in_bio,
> bool is_readahead, bool for_write);
> struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
> -void f2fs_free_dic(struct decompress_io_ctx *dic);
> -void f2fs_decompress_end_io(struct page **rpages,
> - unsigned int cluster_size, bool err, bool verity);
> +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
> +void f2fs_put_page_decompress_io_ctx(struct page *page);
> int f2fs_init_compress_ctx(struct compress_ctx *cc);
> void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
> void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
> @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
> }
> static inline int f2fs_init_compress_mempool(void) { return 0; }
> static inline void f2fs_destroy_compress_mempool(void) { }
> +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
> +{
> + WARN_ON_ONCE(1);
> +}
> +static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
f2fs_put_page_in_dic() or f2fs_put_dic_page()?
> +{
> + WARN_ON_ONCE(1);
> +}
> static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
> static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
> static inline int __init f2fs_init_compress_cache(void) { return 0; }
> @@ -4114,6 +4123,12 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
> return false;
> }
>
> +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
> +{
> + return fsverity_active(inode) &&
> + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
> +}
> +
> #ifdef CONFIG_F2FS_FAULT_INJECTION
> extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> unsigned int type);
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index 56b113e3cd6aa..9e2981733ea4a 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
> TP_ARGS(inode, cluster_idx, cluster_size, algtype)
> );
>
> -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
> +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
I suggest keeping original tracepoint name, it can avoid breaking userspace
binary or script.
Thanks,
>
> TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
> unsigned int cluster_size, unsigned char algtype),
> @@ -1810,7 +1810,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end,
> TP_ARGS(inode, cluster_idx, compressed_size, ret)
> );
>
> -DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
> +DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_cluster_end,
>
> TP_PROTO(struct inode *inode, pgoff_t cluster_idx,
> unsigned int compressed_size, int ret),
>
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
2021-01-04 8:43 ` Chao Yu
@ 2021-01-04 18:33 ` Eric Biggers
-1 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2021-01-04 18:33 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-f2fs-devel, linux-fscrypt
On Mon, Jan 04, 2021 at 04:43:56PM +0800, Chao Yu wrote:
> Hi Eric,
>
> On 2021/1/4 11:45, Eric Biggers wrote:
> > That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
> > it's actually needed.
>
> Yup, now I see.
>
> Some comments as below.
>
> On 2020/12/29 7:26, Eric Biggers wrote:
> > From: Eric Biggers <ebiggers@google.com>
> >
> > Rework the post-read processing logic to be much easier to understand.
> >
> > At least one bug is fixed by this: if an I/O error occurred when reading
> > from disk, decryption and verity would be performed on the uninitialized
> > data, causing misleading messages in the kernel log.
> >
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > ---
Please only quote the parts you're actually replying to.
> > +static void f2fs_post_read_work(struct work_struct *work)
> > {
> > - queue_work(sbi->post_read_wq, work);
> > -}
> > + struct bio_post_read_ctx *ctx =
> > + container_of(work, struct bio_post_read_ctx, work);
> > + struct bio *bio = ctx->bio;
> > -static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
> > -{
> > - /*
> > - * We use different work queues for decryption and for verity because
> > - * verity may require reading metadata pages that need decryption, and
> > - * we shouldn't recurse to the same workqueue.
> > - */
> > + if (ctx->enabled_steps & STEP_DECRYPT)
> > + fscrypt_decrypt_bio(bio);
> > - if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
> > - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
> > - INIT_WORK(&ctx->work, f2fs_post_read_work);
> > - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
> > - return;
> > - }
> > + if (ctx->enabled_steps & STEP_DECOMPRESS) {
> > + struct bio_vec *bv;
> > + struct bvec_iter_all iter_all;
> > + bool all_compressed = true;
> > - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
> > - INIT_WORK(&ctx->work, f2fs_verity_work);
> > - fsverity_enqueue_verify_work(&ctx->work);
> > - return;
> > - }
> > + bio_for_each_segment_all(bv, bio, iter_all) {
> > + struct page *page = bv->bv_page;
> > + /* PG_error will be set if decryption failed. */
> > + bool failed = PageError(page);
> > - __f2fs_read_end_io(ctx->bio, false, false);
> > -}
> > + if (f2fs_is_compressed_page(page))
> > + f2fs_end_read_compressed_page(page, failed);
> > + else
> > + all_compressed = false;
> > + }
> > + /*
> > + * Optimization: if all the bio's pages are compressed, then
> > + * scheduling the per-bio verity work is unnecessary, as verity
> > + * will be fully handled at the compression cluster level.
> > + */
> > + if (all_compressed)
> > + ctx->enabled_steps &= ~STEP_VERITY;
> > + }
>
> Can we wrap above logic into a function for cleanup?
Are you saying you want the STEP_DECOMPRESS handling in a new function, e.g.
f2fs_handle_step_decompress()? I could do that, though this new function would
only be called from f2fs_post_read_work(), which isn't too long. So I'm not
sure it would be better.
> > +/* Context for decompressing one cluster on the read IO path */
> > struct decompress_io_ctx {
> > u32 magic; /* magic number to indicate page is compressed */
> > struct inode *inode; /* inode the context belong to */
> > @@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
> > struct compress_data *cbuf; /* virtual mapped address on cpages */
> > size_t rlen; /* valid data length in rbuf */
> > size_t clen; /* valid data length in cbuf */
> > - atomic_t pending_pages; /* in-flight compressed page count */
> > - atomic_t verity_pages; /* in-flight page count for verity */
> > - bool failed; /* indicate IO error during decompression */
> > + atomic_t remaining_pages; /* number of compressed pages remaining to be read */
> > + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
>
> Now, we use .remaining_pages to control to trigger cluster decompression;
> and .refcnt to control to release dic structure.
>
> How about adding a bit more description about above info for better
> readability?
Would you like longer comments even though every other field in this struct has
a 1-line comment?
> > -void f2fs_free_dic(struct decompress_io_ctx *dic);
> > -void f2fs_decompress_end_io(struct page **rpages,
> > - unsigned int cluster_size, bool err, bool verity);
> > +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
> > +void f2fs_put_page_decompress_io_ctx(struct page *page);
> > int f2fs_init_compress_ctx(struct compress_ctx *cc);
> > void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
> > void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
> > @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
> > }
> > static inline int f2fs_init_compress_mempool(void) { return 0; }
> > static inline void f2fs_destroy_compress_mempool(void) { }
> > +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
> > +{
> > + WARN_ON_ONCE(1);
> > +}
> > +static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
>
> f2fs_put_page_in_dic() or f2fs_put_dic_page()?
It's putting the decompression context of the page, not the page itself. So I
feel the name I've proposed makes more sense.
> > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> > index 56b113e3cd6aa..9e2981733ea4a 100644
> > --- a/include/trace/events/f2fs.h
> > +++ b/include/trace/events/f2fs.h
> > @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
> > TP_ARGS(inode, cluster_idx, cluster_size, algtype)
> > );
> > -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
> > +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
>
> I suggest keeping original tracepoint name, it can avoid breaking userspace
> binary or script.
>
Tracepoints aren't a stable UAPI, and the new name is more logical because it
describes what is being decompressed rather than an implementation detail of
where the data is located (in pages).
- Eric
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
@ 2021-01-04 18:33 ` Eric Biggers
0 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2021-01-04 18:33 UTC (permalink / raw)
To: Chao Yu; +Cc: linux-fscrypt, linux-f2fs-devel
On Mon, Jan 04, 2021 at 04:43:56PM +0800, Chao Yu wrote:
> Hi Eric,
>
> On 2021/1/4 11:45, Eric Biggers wrote:
> > That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
> > it's actually needed.
>
> Yup, now I see.
>
> Some comments as below.
>
> On 2020/12/29 7:26, Eric Biggers wrote:
> > From: Eric Biggers <ebiggers@google.com>
> >
> > Rework the post-read processing logic to be much easier to understand.
> >
> > At least one bug is fixed by this: if an I/O error occurred when reading
> > from disk, decryption and verity would be performed on the uninitialized
> > data, causing misleading messages in the kernel log.
> >
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > ---
Please only quote the parts you're actually replying to.
> > +static void f2fs_post_read_work(struct work_struct *work)
> > {
> > - queue_work(sbi->post_read_wq, work);
> > -}
> > + struct bio_post_read_ctx *ctx =
> > + container_of(work, struct bio_post_read_ctx, work);
> > + struct bio *bio = ctx->bio;
> > -static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
> > -{
> > - /*
> > - * We use different work queues for decryption and for verity because
> > - * verity may require reading metadata pages that need decryption, and
> > - * we shouldn't recurse to the same workqueue.
> > - */
> > + if (ctx->enabled_steps & STEP_DECRYPT)
> > + fscrypt_decrypt_bio(bio);
> > - if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
> > - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
> > - INIT_WORK(&ctx->work, f2fs_post_read_work);
> > - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
> > - return;
> > - }
> > + if (ctx->enabled_steps & STEP_DECOMPRESS) {
> > + struct bio_vec *bv;
> > + struct bvec_iter_all iter_all;
> > + bool all_compressed = true;
> > - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
> > - INIT_WORK(&ctx->work, f2fs_verity_work);
> > - fsverity_enqueue_verify_work(&ctx->work);
> > - return;
> > - }
> > + bio_for_each_segment_all(bv, bio, iter_all) {
> > + struct page *page = bv->bv_page;
> > + /* PG_error will be set if decryption failed. */
> > + bool failed = PageError(page);
> > - __f2fs_read_end_io(ctx->bio, false, false);
> > -}
> > + if (f2fs_is_compressed_page(page))
> > + f2fs_end_read_compressed_page(page, failed);
> > + else
> > + all_compressed = false;
> > + }
> > + /*
> > + * Optimization: if all the bio's pages are compressed, then
> > + * scheduling the per-bio verity work is unnecessary, as verity
> > + * will be fully handled at the compression cluster level.
> > + */
> > + if (all_compressed)
> > + ctx->enabled_steps &= ~STEP_VERITY;
> > + }
>
> Can we wrap above logic into a function for cleanup?
Are you saying you want the STEP_DECOMPRESS handling in a new function, e.g.
f2fs_handle_step_decompress()? I could do that, though this new function would
only be called from f2fs_post_read_work(), which isn't too long. So I'm not
sure it would be better.
> > +/* Context for decompressing one cluster on the read IO path */
> > struct decompress_io_ctx {
> > u32 magic; /* magic number to indicate page is compressed */
> > struct inode *inode; /* inode the context belong to */
> > @@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
> > struct compress_data *cbuf; /* virtual mapped address on cpages */
> > size_t rlen; /* valid data length in rbuf */
> > size_t clen; /* valid data length in cbuf */
> > - atomic_t pending_pages; /* in-flight compressed page count */
> > - atomic_t verity_pages; /* in-flight page count for verity */
> > - bool failed; /* indicate IO error during decompression */
> > + atomic_t remaining_pages; /* number of compressed pages remaining to be read */
> > + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
>
> Now, we use .remaining_pages to control to trigger cluster decompression;
> and .refcnt to control to release dic structure.
>
> How about adding a bit more description about above info for better
> readability?
Would you like longer comments even though every other field in this struct has
a 1-line comment?
> > -void f2fs_free_dic(struct decompress_io_ctx *dic);
> > -void f2fs_decompress_end_io(struct page **rpages,
> > - unsigned int cluster_size, bool err, bool verity);
> > +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
> > +void f2fs_put_page_decompress_io_ctx(struct page *page);
> > int f2fs_init_compress_ctx(struct compress_ctx *cc);
> > void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
> > void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
> > @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
> > }
> > static inline int f2fs_init_compress_mempool(void) { return 0; }
> > static inline void f2fs_destroy_compress_mempool(void) { }
> > +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
> > +{
> > + WARN_ON_ONCE(1);
> > +}
> > +static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
>
> f2fs_put_page_in_dic() or f2fs_put_dic_page()?
It's putting the decompression context of the page, not the page itself. So I
feel the name I've proposed makes more sense.
> > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> > index 56b113e3cd6aa..9e2981733ea4a 100644
> > --- a/include/trace/events/f2fs.h
> > +++ b/include/trace/events/f2fs.h
> > @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
> > TP_ARGS(inode, cluster_idx, cluster_size, algtype)
> > );
> > -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
> > +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
>
> I suggest keeping original tracepoint name, it can avoid breaking userspace
> binary or script.
>
Tracepoints aren't a stable UAPI, and the new name is more logical because it
describes what is being decompressed rather than an implementation detail of
where the data is located (in pages).
- Eric
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
2021-01-04 18:33 ` Eric Biggers
@ 2021-01-05 1:26 ` Chao Yu
-1 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2021-01-05 1:26 UTC (permalink / raw)
To: Eric Biggers; +Cc: linux-f2fs-devel, linux-fscrypt
On 2021/1/5 2:33, Eric Biggers wrote:
> On Mon, Jan 04, 2021 at 04:43:56PM +0800, Chao Yu wrote:
>> Hi Eric,
>>
>> On 2021/1/4 11:45, Eric Biggers wrote:
>>> That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
>>> it's actually needed.
>>
>> Yup, now I see.
>>
>> Some comments as below.
>>
>> On 2020/12/29 7:26, Eric Biggers wrote:
>>> From: Eric Biggers <ebiggers@google.com>
>>>
>>> Rework the post-read processing logic to be much easier to understand.
>>>
>>> At least one bug is fixed by this: if an I/O error occurred when reading
>>> from disk, decryption and verity would be performed on the uninitialized
>>> data, causing misleading messages in the kernel log.
>>>
>>> Signed-off-by: Eric Biggers <ebiggers@google.com>
>>> ---
>
> Please only quote the parts you're actually replying to.
>
>>> +static void f2fs_post_read_work(struct work_struct *work)
>>> {
>>> - queue_work(sbi->post_read_wq, work);
>>> -}
>>> + struct bio_post_read_ctx *ctx =
>>> + container_of(work, struct bio_post_read_ctx, work);
>>> + struct bio *bio = ctx->bio;
>>> -static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
>>> -{
>>> - /*
>>> - * We use different work queues for decryption and for verity because
>>> - * verity may require reading metadata pages that need decryption, and
>>> - * we shouldn't recurse to the same workqueue.
>>> - */
>>> + if (ctx->enabled_steps & STEP_DECRYPT)
>>> + fscrypt_decrypt_bio(bio);
>>> - if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
>>> - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
>>> - INIT_WORK(&ctx->work, f2fs_post_read_work);
>>> - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
>>> - return;
>>> - }
>>> + if (ctx->enabled_steps & STEP_DECOMPRESS) {
>>> + struct bio_vec *bv;
>>> + struct bvec_iter_all iter_all;
>>> + bool all_compressed = true;
>>> - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
>>> - INIT_WORK(&ctx->work, f2fs_verity_work);
>>> - fsverity_enqueue_verify_work(&ctx->work);
>>> - return;
>>> - }
>>> + bio_for_each_segment_all(bv, bio, iter_all) {
>>> + struct page *page = bv->bv_page;
>>> + /* PG_error will be set if decryption failed. */
>>> + bool failed = PageError(page);
>>> - __f2fs_read_end_io(ctx->bio, false, false);
>>> -}
>>> + if (f2fs_is_compressed_page(page))
>>> + f2fs_end_read_compressed_page(page, failed);
>>> + else
>>> + all_compressed = false;
>>> + }
>>> + /*
>>> + * Optimization: if all the bio's pages are compressed, then
>>> + * scheduling the per-bio verity work is unnecessary, as verity
>>> + * will be fully handled at the compression cluster level.
>>> + */
>>> + if (all_compressed)
>>> + ctx->enabled_steps &= ~STEP_VERITY;
>>> + }
>>
>> Can we wrap above logic into a function for cleanup?
>
> Are you saying you want the STEP_DECOMPRESS handling in a new function, e.g.
Yes, IMO, this will make bio_post_read_processing() more clean.
Something like:
if (ctx->enabled_steps & STEP_DECOMPRESS) {
f2fs_decompress_bio();
/*
...
*/
if (all_compressed)
ctx->enabled_steps &= ~STEP_VERITY;
}
> f2fs_handle_step_decompress()? I could do that, though this new function would
> only be called from f2fs_post_read_work(), which isn't too long. So I'm not
At least one more function name could explain a little bit more about what is doing
there.
> sure it would be better.
>
>>> +/* Context for decompressing one cluster on the read IO path */
>>> struct decompress_io_ctx {
>>> u32 magic; /* magic number to indicate page is compressed */
>>> struct inode *inode; /* inode the context belong to */
>>> @@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
>>> struct compress_data *cbuf; /* virtual mapped address on cpages */
>>> size_t rlen; /* valid data length in rbuf */
>>> size_t clen; /* valid data length in cbuf */
>>> - atomic_t pending_pages; /* in-flight compressed page count */
>>> - atomic_t verity_pages; /* in-flight page count for verity */
>>> - bool failed; /* indicate IO error during decompression */
>>> + atomic_t remaining_pages; /* number of compressed pages remaining to be read */
>>> + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
>>
>> Now, we use .remaining_pages to control to trigger cluster decompression;
>> and .refcnt to control to release dic structure.
>>
>> How about adding a bit more description about above info for better
>> readability?
>
> Would you like longer comments even though every other field in this struct has
> a 1-line comment?
Yes, it's fine to me, f2fs has many samples like this.
>
>>> -void f2fs_free_dic(struct decompress_io_ctx *dic);
>>> -void f2fs_decompress_end_io(struct page **rpages,
>>> - unsigned int cluster_size, bool err, bool verity);
>>> +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
>>> +void f2fs_put_page_decompress_io_ctx(struct page *page);
>>> int f2fs_init_compress_ctx(struct compress_ctx *cc);
>>> void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
>>> void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
>>> @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
>>> }
>>> static inline int f2fs_init_compress_mempool(void) { return 0; }
>>> static inline void f2fs_destroy_compress_mempool(void) { }
>>> +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
>>> +{
>>> + WARN_ON_ONCE(1);
>>> +}
>>> +static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
>>
>> f2fs_put_page_in_dic() or f2fs_put_dic_page()?
>
> It's putting the decompression context of the page, not the page itself. So I
> feel the name I've proposed makes more sense.
Actually, my concern is the name looks a bit long, though it's not a big deal.
>
>>> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
>>> index 56b113e3cd6aa..9e2981733ea4a 100644
>>> --- a/include/trace/events/f2fs.h
>>> +++ b/include/trace/events/f2fs.h
>>> @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
>>> TP_ARGS(inode, cluster_idx, cluster_size, algtype)
>>> );
>>> -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
>>> +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
>>
>> I suggest keeping original tracepoint name, it can avoid breaking userspace
>> binary or script.
>>
>
> Tracepoints aren't a stable UAPI, and the new name is more logical because it
> describes what is being decompressed rather than an implementation detail of
> where the data is located (in pages).
Both ..decompress_cluster.. or ..decompress_pages.. look fine to me, so I don't
think it's necessary for the name change, however after the change, developers/users
may suffer its side-effect that they have to change their script/binary to enable
the f2fs tracepoint according to kernel version.
Though tracepoint isn't a stable UAPI, I still don't want to break it.
Thanks,
>
> - Eric
> .
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [f2fs-dev] [PATCH v2] f2fs: clean up post-read processing
@ 2021-01-05 1:26 ` Chao Yu
0 siblings, 0 replies; 12+ messages in thread
From: Chao Yu @ 2021-01-05 1:26 UTC (permalink / raw)
To: Eric Biggers; +Cc: linux-fscrypt, linux-f2fs-devel
On 2021/1/5 2:33, Eric Biggers wrote:
> On Mon, Jan 04, 2021 at 04:43:56PM +0800, Chao Yu wrote:
>> Hi Eric,
>>
>> On 2021/1/4 11:45, Eric Biggers wrote:
>>> That's already handled; I made it so that STEP_DECOMPRESS is only enabled when
>>> it's actually needed.
>>
>> Yup, now I see.
>>
>> Some comments as below.
>>
>> On 2020/12/29 7:26, Eric Biggers wrote:
>>> From: Eric Biggers <ebiggers@google.com>
>>>
>>> Rework the post-read processing logic to be much easier to understand.
>>>
>>> At least one bug is fixed by this: if an I/O error occurred when reading
>>> from disk, decryption and verity would be performed on the uninitialized
>>> data, causing misleading messages in the kernel log.
>>>
>>> Signed-off-by: Eric Biggers <ebiggers@google.com>
>>> ---
>
> Please only quote the parts you're actually replying to.
>
>>> +static void f2fs_post_read_work(struct work_struct *work)
>>> {
>>> - queue_work(sbi->post_read_wq, work);
>>> -}
>>> + struct bio_post_read_ctx *ctx =
>>> + container_of(work, struct bio_post_read_ctx, work);
>>> + struct bio *bio = ctx->bio;
>>> -static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
>>> -{
>>> - /*
>>> - * We use different work queues for decryption and for verity because
>>> - * verity may require reading metadata pages that need decryption, and
>>> - * we shouldn't recurse to the same workqueue.
>>> - */
>>> + if (ctx->enabled_steps & STEP_DECRYPT)
>>> + fscrypt_decrypt_bio(bio);
>>> - if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
>>> - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
>>> - INIT_WORK(&ctx->work, f2fs_post_read_work);
>>> - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
>>> - return;
>>> - }
>>> + if (ctx->enabled_steps & STEP_DECOMPRESS) {
>>> + struct bio_vec *bv;
>>> + struct bvec_iter_all iter_all;
>>> + bool all_compressed = true;
>>> - if (ctx->enabled_steps & (1 << STEP_VERITY)) {
>>> - INIT_WORK(&ctx->work, f2fs_verity_work);
>>> - fsverity_enqueue_verify_work(&ctx->work);
>>> - return;
>>> - }
>>> + bio_for_each_segment_all(bv, bio, iter_all) {
>>> + struct page *page = bv->bv_page;
>>> + /* PG_error will be set if decryption failed. */
>>> + bool failed = PageError(page);
>>> - __f2fs_read_end_io(ctx->bio, false, false);
>>> -}
>>> + if (f2fs_is_compressed_page(page))
>>> + f2fs_end_read_compressed_page(page, failed);
>>> + else
>>> + all_compressed = false;
>>> + }
>>> + /*
>>> + * Optimization: if all the bio's pages are compressed, then
>>> + * scheduling the per-bio verity work is unnecessary, as verity
>>> + * will be fully handled at the compression cluster level.
>>> + */
>>> + if (all_compressed)
>>> + ctx->enabled_steps &= ~STEP_VERITY;
>>> + }
>>
>> Can we wrap above logic into a function for cleanup?
>
> Are you saying you want the STEP_DECOMPRESS handling in a new function, e.g.
Yes, IMO, this will make bio_post_read_processing() more clean.
Something like:
if (ctx->enabled_steps & STEP_DECOMPRESS) {
f2fs_decompress_bio();
/*
...
*/
if (all_compressed)
ctx->enabled_steps &= ~STEP_VERITY;
}
> f2fs_handle_step_decompress()? I could do that, though this new function would
> only be called from f2fs_post_read_work(), which isn't too long. So I'm not
At least one more function name could explain a little bit more about what is doing
there.
> sure it would be better.
>
>>> +/* Context for decompressing one cluster on the read IO path */
>>> struct decompress_io_ctx {
>>> u32 magic; /* magic number to indicate page is compressed */
>>> struct inode *inode; /* inode the context belong to */
>>> @@ -1353,11 +1353,13 @@ struct decompress_io_ctx {
>>> struct compress_data *cbuf; /* virtual mapped address on cpages */
>>> size_t rlen; /* valid data length in rbuf */
>>> size_t clen; /* valid data length in cbuf */
>>> - atomic_t pending_pages; /* in-flight compressed page count */
>>> - atomic_t verity_pages; /* in-flight page count for verity */
>>> - bool failed; /* indicate IO error during decompression */
>>> + atomic_t remaining_pages; /* number of compressed pages remaining to be read */
>>> + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */
>>
>> Now, we use .remaining_pages to control to trigger cluster decompression;
>> and .refcnt to control to release dic structure.
>>
>> How about adding a bit more description about above info for better
>> readability?
>
> Would you like longer comments even though every other field in this struct has
> a 1-line comment?
Yes, it's fine to me, f2fs has many samples like this.
>
>>> -void f2fs_free_dic(struct decompress_io_ctx *dic);
>>> -void f2fs_decompress_end_io(struct page **rpages,
>>> - unsigned int cluster_size, bool err, bool verity);
>>> +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
>>> +void f2fs_put_page_decompress_io_ctx(struct page *page);
>>> int f2fs_init_compress_ctx(struct compress_ctx *cc);
>>> void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
>>> void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
>>> @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
>>> }
>>> static inline int f2fs_init_compress_mempool(void) { return 0; }
>>> static inline void f2fs_destroy_compress_mempool(void) { }
>>> +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed)
>>> +{
>>> + WARN_ON_ONCE(1);
>>> +}
>>> +static inline void f2fs_put_page_decompress_io_ctx(struct page *page)
>>
>> f2fs_put_page_in_dic() or f2fs_put_dic_page()?
>
> It's putting the decompression context of the page, not the page itself. So I
> feel the name I've proposed makes more sense.
Actually, my concern is the name looks a bit long, though it's not a big deal.
>
>>> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
>>> index 56b113e3cd6aa..9e2981733ea4a 100644
>>> --- a/include/trace/events/f2fs.h
>>> +++ b/include/trace/events/f2fs.h
>>> @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start,
>>> TP_ARGS(inode, cluster_idx, cluster_size, algtype)
>>> );
>>> -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start,
>>> +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start,
>>
>> I suggest keeping original tracepoint name, it can avoid breaking userspace
>> binary or script.
>>
>
> Tracepoints aren't a stable UAPI, and the new name is more logical because it
> describes what is being decompressed rather than an implementation detail of
> where the data is located (in pages).
Both ..decompress_cluster.. or ..decompress_pages.. look fine to me, so I don't
think it's necessary for the name change, however after the change, developers/users
may suffer its side-effect that they have to change their script/binary to enable
the f2fs tracepoint according to kernel version.
Though tracepoint isn't a stable UAPI, I still don't want to break it.
Thanks,
>
> - Eric
> .
>
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2021-01-05 1:27 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-28 23:26 [PATCH v2] f2fs: clean up post-read processing Eric Biggers
2020-12-28 23:26 ` [f2fs-dev] " Eric Biggers
2021-01-04 3:35 ` Chao Yu
2021-01-04 3:35 ` Chao Yu
2021-01-04 3:45 ` Eric Biggers
2021-01-04 3:45 ` Eric Biggers
2021-01-04 8:43 ` Chao Yu
2021-01-04 8:43 ` Chao Yu
2021-01-04 18:33 ` Eric Biggers
2021-01-04 18:33 ` Eric Biggers
2021-01-05 1:26 ` Chao Yu
2021-01-05 1:26 ` Chao Yu
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.