Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/6] Make block_read_full_page synchronous
@ 2020-10-22 21:22 Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 1/6] block: Add blk_completion Matthew Wilcox (Oracle)
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-22 21:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt
  Cc: Matthew Wilcox (Oracle)

This patchset does I/Os in larger chunks, if the blocks are adjacent.
More importantly, it returns the actual error from ->readpage for
filesystems which use block_read_full_page().  Unless fscrypt returns
an error, in which case it turns into EIO because it has to roundtrip
through bi_status.

I don't have a system with fscrypt enabled, so I'd appreciate some
testing from the fscrypt people.

Matthew Wilcox (Oracle) (6):
  block: Add blk_completion
  fs: Return error from block_read_full_page
  fs: Convert block_read_full_page to be synchronous
  fs: Hoist fscrypt decryption to bio completion handler
  fs: Turn decrypt_bh into decrypt_bio
  fs: Convert block_read_full_page to be synchronous with fscrypt
    enabled

 block/blk-core.c    |  61 +++++++++
 fs/buffer.c         | 304 ++++++++++++++++++++++----------------------
 include/linux/bio.h |  11 ++
 3 files changed, 226 insertions(+), 150 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/6] block: Add blk_completion
  2020-10-22 21:22 [PATCH 0/6] Make block_read_full_page synchronous Matthew Wilcox (Oracle)
@ 2020-10-22 21:22 ` Matthew Wilcox (Oracle)
  2020-10-27 18:30   ` Christoph Hellwig
  2020-10-22 21:22 ` [PATCH 2/6] fs: Return error from block_read_full_page Matthew Wilcox (Oracle)
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-22 21:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt
  Cc: Matthew Wilcox (Oracle)

This new data structure allows a task to wait for N things to complete.
Usually the submitting task will handle cleanup, but if it is killed,
the last completer will take care of it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 block/blk-core.c    | 61 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/bio.h | 11 ++++++++
 2 files changed, 72 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index 10c08ac50697..2892246f2176 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1900,6 +1900,67 @@ void blk_io_schedule(void)
 }
 EXPORT_SYMBOL_GPL(blk_io_schedule);
 
+void blk_completion_init(struct blk_completion *cmpl, int n)
+{
+	spin_lock_init(&cmpl->cmpl_lock);
+	cmpl->cmpl_count = n;
+	cmpl->cmpl_task = current;
+	cmpl->cmpl_status = BLK_STS_OK;
+}
+
+int blk_completion_sub(struct blk_completion *cmpl, blk_status_t status, int n)
+{
+	int ret = 0;
+
+	spin_lock_bh(&cmpl->cmpl_lock);
+	if (cmpl->cmpl_status == BLK_STS_OK && status != BLK_STS_OK)
+		cmpl->cmpl_status = status;
+	cmpl->cmpl_count -= n;
+	BUG_ON(cmpl->cmpl_count < 0);
+	if (cmpl->cmpl_count == 0) {
+		if (cmpl->cmpl_task)
+			wake_up_process(cmpl->cmpl_task);
+		else
+			ret = -EINTR;
+	}
+	spin_unlock_bh(&cmpl->cmpl_lock);
+	if (ret < 0)
+		kfree(cmpl);
+	return ret;
+}
+
+int blk_completion_wait_killable(struct blk_completion *cmpl)
+{
+	int err = 0;
+
+	for (;;) {
+		set_current_state(TASK_KILLABLE);
+		spin_lock_bh(&cmpl->cmpl_lock);
+		if (cmpl->cmpl_count == 0)
+			break;
+		spin_unlock_bh(&cmpl->cmpl_lock);
+		blk_io_schedule();
+		if (fatal_signal_pending(current)) {
+			spin_lock_bh(&cmpl->cmpl_lock);
+			cmpl->cmpl_task = NULL;
+			if (cmpl->cmpl_count != 0) {
+				spin_unlock_bh(&cmpl->cmpl_lock);
+				cmpl = NULL;
+			}
+			err = -ERESTARTSYS;
+			break;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+	if (cmpl) {
+		spin_unlock_bh(&cmpl->cmpl_lock);
+		err = blk_status_to_errno(cmpl->cmpl_status);
+		kfree(cmpl);
+	}
+
+	return err;
+}
+
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f254bc79bb3a..0bde05f5548c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -814,4 +814,15 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
 		bio->bi_opf |= REQ_NOWAIT;
 }
 
+struct blk_completion {
+	struct task_struct *cmpl_task;
+	spinlock_t cmpl_lock;
+	int cmpl_count;
+	blk_status_t cmpl_status;
+};
+
+void blk_completion_init(struct blk_completion *, int n);
+int blk_completion_sub(struct blk_completion *, blk_status_t status, int n);
+int blk_completion_wait_killable(struct blk_completion *);
+
 #endif /* __LINUX_BIO_H */
-- 
2.28.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/6] fs: Return error from block_read_full_page
  2020-10-22 21:22 [PATCH 0/6] Make block_read_full_page synchronous Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 1/6] block: Add blk_completion Matthew Wilcox (Oracle)
@ 2020-10-22 21:22 ` Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 3/6] fs: Convert block_read_full_page to be synchronous Matthew Wilcox (Oracle)
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-22 21:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt
  Cc: Matthew Wilcox (Oracle)

If the filesystem returns an error from get_block, report it
instead of ineffectually setting PageError.  Don't bother starting
any I/Os in this case since they won't bring the page Uptodate.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/buffer.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 1d5337517dcd..1b0ba1d59966 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2262,7 +2262,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	sector_t iblock, lblock;
 	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
 	unsigned int blocksize, bbits;
-	int nr, i;
+	int nr, i, err = 0;
 	int fully_mapped = 1;
 
 	head = create_page_buffers(page, inode, 0);
@@ -2280,19 +2280,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 			continue;
 
 		if (!buffer_mapped(bh)) {
-			int err = 0;
-
 			fully_mapped = 0;
 			if (iblock < lblock) {
 				WARN_ON(bh->b_size != blocksize);
 				err = get_block(inode, iblock, bh, 0);
 				if (err)
-					SetPageError(page);
+					break;
 			}
 			if (!buffer_mapped(bh)) {
 				zero_user(page, i * blocksize, blocksize);
-				if (!err)
-					set_buffer_uptodate(bh);
+				set_buffer_uptodate(bh);
 				continue;
 			}
 			/*
@@ -2305,18 +2302,17 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 		arr[nr++] = bh;
 	} while (i++, iblock++, (bh = bh->b_this_page) != head);
 
+	if (err) {
+		unlock_page(page);
+		return err;
+	}
 	if (fully_mapped)
 		SetPageMappedToDisk(page);
 
 	if (!nr) {
-		/*
-		 * All buffers are uptodate - we can set the page uptodate
-		 * as well. But not if get_block() returned an error.
-		 */
-		if (!PageError(page))
-			SetPageUptodate(page);
-		unlock_page(page);
-		return 0;
+		/* All buffers are uptodate - we can set the page uptodate */
+		SetPageUptodate(page);
+		return AOP_UPDATED_PAGE;
 	}
 
 	/* Stage two: lock the buffers */
-- 
2.28.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/6] fs: Convert block_read_full_page to be synchronous
  2020-10-22 21:22 [PATCH 0/6] Make block_read_full_page synchronous Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 1/6] block: Add blk_completion Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 2/6] fs: Return error from block_read_full_page Matthew Wilcox (Oracle)
@ 2020-10-22 21:22 ` Matthew Wilcox (Oracle)
  2020-10-22 23:35   ` Eric Biggers
  2020-10-22 23:40   ` Eric Biggers
  2020-10-22 21:22 ` [PATCH 4/6] fs: Hoist fscrypt decryption to bio completion handler Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 13+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-22 21:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt
  Cc: Matthew Wilcox (Oracle)

Use the new blk_completion infrastructure to wait for multiple I/Os.
Also coalesce adjacent buffer heads into a single BIO instead of
submitting one BIO per buffer head.  This doesn't work for fscrypt yet,
so keep the old code around for now.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/buffer.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/fs/buffer.c b/fs/buffer.c
index 1b0ba1d59966..ccb90081117c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2249,6 +2249,87 @@ int block_is_partially_uptodate(struct page *page, unsigned long from,
 }
 EXPORT_SYMBOL(block_is_partially_uptodate);
 
+static void readpage_end_bio(struct bio *bio)
+{
+	struct bio_vec *bvec;
+	struct page *page;
+	struct buffer_head *bh;
+	int i, nr = 0;
+
+	bio_for_each_bvec_all(bvec, bio, i) {
+		size_t offset = 0;
+		size_t max = bvec->bv_offset + bvec->bv_len;
+
+		page = bvec->bv_page;
+		bh = page_buffers(page);
+
+		for (offset = 0; offset < max; offset += bh->b_size,
+				bh = bh->b_this_page) {
+			if (offset < bvec->bv_offset)
+				continue;
+			BUG_ON(bh_offset(bh) != offset);
+			nr++;
+			if (unlikely(bio_flagged(bio, BIO_QUIET)))
+				set_bit(BH_Quiet, &bh->b_state);
+			if (bio->bi_status == BLK_STS_OK)
+				set_buffer_uptodate(bh);
+			else
+				buffer_io_error(bh, ", async page read");
+			unlock_buffer(bh);
+		}
+	}
+
+	if (blk_completion_sub(bio->bi_private, bio->bi_status, nr) < 0)
+		unlock_page(page);
+	bio_put(bio);
+}
+
+static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
+		unsigned int nr, struct buffer_head **bhs)
+{
+	struct bio *bio = NULL;
+	unsigned int i;
+	int err;
+
+	blk_completion_init(cmpl, nr);
+
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+		sector_t sector = bh->b_blocknr * (bh->b_size >> 9);
+		bool same_page;
+
+		if (buffer_uptodate(bh)) {
+			end_buffer_async_read(bh, 1);
+			blk_completion_sub(cmpl, BLK_STS_OK, 1);
+			continue;
+		}
+		if (bio) {
+			if (bio_end_sector(bio) == sector &&
+			    __bio_try_merge_page(bio, bh->b_page, bh->b_size,
+					bh_offset(bh), &same_page))
+				continue;
+			submit_bio(bio);
+		}
+		bio = bio_alloc(GFP_NOIO, 1);
+		bio_set_dev(bio, bh->b_bdev);
+		bio->bi_iter.bi_sector = sector;
+		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+		bio->bi_end_io = readpage_end_bio;
+		bio->bi_private = cmpl;
+		/* Take care of bh's that straddle the end of the device */
+		guard_bio_eod(bio);
+	}
+
+	if (bio)
+		submit_bio(bio);
+
+	err = blk_completion_wait_killable(cmpl);
+	if (!err)
+		return AOP_UPDATED_PAGE;
+	unlock_page(page);
+	return err;
+}
+
 /*
  * Generic "read page" function for block devices that have the normal
  * get_block functionality. This is most of the block device filesystems.
@@ -2258,6 +2339,7 @@ EXPORT_SYMBOL(block_is_partially_uptodate);
  */
 int block_read_full_page(struct page *page, get_block_t *get_block)
 {
+	struct blk_completion *cmpl = kmalloc(sizeof(*cmpl), GFP_NOIO);
 	struct inode *inode = page->mapping->host;
 	sector_t iblock, lblock;
 	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
@@ -2265,6 +2347,9 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	int nr, i, err = 0;
 	int fully_mapped = 1;
 
+	if (!cmpl)
+		return -ENOMEM;
+
 	head = create_page_buffers(page, inode, 0);
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
@@ -2303,6 +2388,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	} while (i++, iblock++, (bh = bh->b_this_page) != head);
 
 	if (err) {
+		kfree(cmpl);
 		unlock_page(page);
 		return err;
 	}
@@ -2322,6 +2408,10 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 		mark_buffer_async_read(bh);
 	}
 
+	if (!fscrypt_inode_uses_fs_layer_crypto(inode))
+		return readpage_submit_bhs(page, cmpl, nr, arr);
+	kfree(cmpl);
+
 	/*
 	 * Stage 3: start the IO.  Check for uptodateness
 	 * inside the buffer lock in case another process reading
-- 
2.28.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4/6] fs: Hoist fscrypt decryption to bio completion handler
  2020-10-22 21:22 [PATCH 0/6] Make block_read_full_page synchronous Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2020-10-22 21:22 ` [PATCH 3/6] fs: Convert block_read_full_page to be synchronous Matthew Wilcox (Oracle)
@ 2020-10-22 21:22 ` Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 5/6] fs: Turn decrypt_bh into decrypt_bio Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 6/6] fs: Convert block_read_full_page to be synchronous with fscrypt enabled Matthew Wilcox (Oracle)
  5 siblings, 0 replies; 13+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-22 21:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt
  Cc: Matthew Wilcox (Oracle)

This is prep work for doing decryption at the BIO level instead of
the BH level.  It still works on one BH at a time for now.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/buffer.c | 45 +++++++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index ccb90081117c..627ae1d853c0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -241,6 +241,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
 	return ret;
 }
 
+/*
+ * I/O completion handler for block_read_full_page() - pages
+ * which come unlocked at the end of I/O.
+ */
 static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 {
 	unsigned long flags;
@@ -313,28 +317,6 @@ static void decrypt_bh(struct work_struct *work)
 	kfree(ctx);
 }
 
-/*
- * I/O completion handler for block_read_full_page() - pages
- * which come unlocked at the end of I/O.
- */
-static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
-{
-	/* Decrypt if needed */
-	if (uptodate &&
-	    fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
-		struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
-
-		if (ctx) {
-			INIT_WORK(&ctx->work, decrypt_bh);
-			ctx->bh = bh;
-			fscrypt_enqueue_decrypt_work(&ctx->work);
-			return;
-		}
-		uptodate = 0;
-	}
-	end_buffer_async_read(bh, uptodate);
-}
-
 /*
  * Completion handler for block_write_full_page() - pages which are unlocked
  * during I/O, and which have PageWriteback cleared upon I/O completion.
@@ -404,7 +386,7 @@ EXPORT_SYMBOL(end_buffer_async_write);
  */
 static void mark_buffer_async_read(struct buffer_head *bh)
 {
-	bh->b_end_io = end_buffer_async_read_io;
+	bh->b_end_io = end_buffer_async_read;
 	set_buffer_async_read(bh);
 }
 
@@ -3103,11 +3085,26 @@ EXPORT_SYMBOL(generic_block_bmap);
 static void end_bio_bh_io_sync(struct bio *bio)
 {
 	struct buffer_head *bh = bio->bi_private;
+	int uptodate = !bio->bi_status;
 
 	if (unlikely(bio_flagged(bio, BIO_QUIET)))
 		set_bit(BH_Quiet, &bh->b_state);
 
-	bh->b_end_io(bh, !bio->bi_status);
+	/* Decrypt if needed */
+	if ((bio_data_dir(bio) == READ) && uptodate &&
+	    fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
+		struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
+
+		if (ctx) {
+			INIT_WORK(&ctx->work, decrypt_bh);
+			ctx->bh = bh;
+			fscrypt_enqueue_decrypt_work(&ctx->work);
+			bio_put(bio);
+			return;
+		}
+		uptodate = 0;
+	}
+	bh->b_end_io(bh, uptodate);
 	bio_put(bio);
 }
 
-- 
2.28.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 5/6] fs: Turn decrypt_bh into decrypt_bio
  2020-10-22 21:22 [PATCH 0/6] Make block_read_full_page synchronous Matthew Wilcox (Oracle)
                   ` (3 preceding siblings ...)
  2020-10-22 21:22 ` [PATCH 4/6] fs: Hoist fscrypt decryption to bio completion handler Matthew Wilcox (Oracle)
@ 2020-10-22 21:22 ` Matthew Wilcox (Oracle)
  2020-10-22 21:22 ` [PATCH 6/6] fs: Convert block_read_full_page to be synchronous with fscrypt enabled Matthew Wilcox (Oracle)
  5 siblings, 0 replies; 13+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-22 21:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt
  Cc: Matthew Wilcox (Oracle)

Pass a bio to decrypt_bio instead of a buffer_head to decrypt_bh.
Another step towards doing decryption per-BIO instead of per-BH.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/buffer.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 627ae1d853c0..f859e0929b7e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -299,22 +299,24 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	return;
 }
 
-struct decrypt_bh_ctx {
+struct decrypt_bio_ctx {
 	struct work_struct work;
-	struct buffer_head *bh;
+	struct bio *bio;
 };
 
-static void decrypt_bh(struct work_struct *work)
+static void decrypt_bio(struct work_struct *work)
 {
-	struct decrypt_bh_ctx *ctx =
-		container_of(work, struct decrypt_bh_ctx, work);
-	struct buffer_head *bh = ctx->bh;
+	struct decrypt_bio_ctx *ctx =
+		container_of(work, struct decrypt_bio_ctx, work);
+	struct bio *bio = ctx->bio;
+	struct buffer_head *bh = bio->bi_private;
 	int err;
 
 	err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
 					       bh_offset(bh));
 	end_buffer_async_read(bh, err == 0);
 	kfree(ctx);
+	bio_put(bio);
 }
 
 /*
@@ -3093,13 +3095,12 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	/* Decrypt if needed */
 	if ((bio_data_dir(bio) == READ) && uptodate &&
 	    fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
-		struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
+		struct decrypt_bio_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
 
 		if (ctx) {
-			INIT_WORK(&ctx->work, decrypt_bh);
-			ctx->bh = bh;
+			INIT_WORK(&ctx->work, decrypt_bio);
+			ctx->bio = bio;
 			fscrypt_enqueue_decrypt_work(&ctx->work);
-			bio_put(bio);
 			return;
 		}
 		uptodate = 0;
-- 
2.28.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 6/6] fs: Convert block_read_full_page to be synchronous with fscrypt enabled
  2020-10-22 21:22 [PATCH 0/6] Make block_read_full_page synchronous Matthew Wilcox (Oracle)
                   ` (4 preceding siblings ...)
  2020-10-22 21:22 ` [PATCH 5/6] fs: Turn decrypt_bh into decrypt_bio Matthew Wilcox (Oracle)
@ 2020-10-22 21:22 ` Matthew Wilcox (Oracle)
  5 siblings, 0 replies; 13+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-10-22 21:22 UTC (permalink / raw)
  To: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt
  Cc: Matthew Wilcox (Oracle)

Use the new decrypt_end_bio() instead of readpage_end_bio() if
fscrypt needs to be used.  Remove the old end_buffer_async_read()
now that all BHs go through readpage_end_bio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/buffer.c | 198 ++++++++++++++++------------------------------------
 1 file changed, 59 insertions(+), 139 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index f859e0929b7e..62c74f0102d4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -241,84 +241,6 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
 	return ret;
 }
 
-/*
- * I/O completion handler for block_read_full_page() - pages
- * which come unlocked at the end of I/O.
- */
-static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
-{
-	unsigned long flags;
-	struct buffer_head *first;
-	struct buffer_head *tmp;
-	struct page *page;
-	int page_uptodate = 1;
-
-	BUG_ON(!buffer_async_read(bh));
-
-	page = bh->b_page;
-	if (uptodate) {
-		set_buffer_uptodate(bh);
-	} else {
-		clear_buffer_uptodate(bh);
-		buffer_io_error(bh, ", async page read");
-		SetPageError(page);
-	}
-
-	/*
-	 * Be _very_ careful from here on. Bad things can happen if
-	 * two buffer heads end IO at almost the same time and both
-	 * decide that the page is now completely done.
-	 */
-	first = page_buffers(page);
-	spin_lock_irqsave(&first->b_uptodate_lock, flags);
-	clear_buffer_async_read(bh);
-	unlock_buffer(bh);
-	tmp = bh;
-	do {
-		if (!buffer_uptodate(tmp))
-			page_uptodate = 0;
-		if (buffer_async_read(tmp)) {
-			BUG_ON(!buffer_locked(tmp));
-			goto still_busy;
-		}
-		tmp = tmp->b_this_page;
-	} while (tmp != bh);
-	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
-
-	/*
-	 * If none of the buffers had errors and they are all
-	 * uptodate then we can set the page uptodate.
-	 */
-	if (page_uptodate && !PageError(page))
-		SetPageUptodate(page);
-	unlock_page(page);
-	return;
-
-still_busy:
-	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
-	return;
-}
-
-struct decrypt_bio_ctx {
-	struct work_struct work;
-	struct bio *bio;
-};
-
-static void decrypt_bio(struct work_struct *work)
-{
-	struct decrypt_bio_ctx *ctx =
-		container_of(work, struct decrypt_bio_ctx, work);
-	struct bio *bio = ctx->bio;
-	struct buffer_head *bh = bio->bi_private;
-	int err;
-
-	err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
-					       bh_offset(bh));
-	end_buffer_async_read(bh, err == 0);
-	kfree(ctx);
-	bio_put(bio);
-}
-
 /*
  * Completion handler for block_write_full_page() - pages which are unlocked
  * during I/O, and which have PageWriteback cleared upon I/O completion.
@@ -365,33 +287,6 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 }
 EXPORT_SYMBOL(end_buffer_async_write);
 
-/*
- * If a page's buffers are under async readin (end_buffer_async_read
- * completion) then there is a possibility that another thread of
- * control could lock one of the buffers after it has completed
- * but while some of the other buffers have not completed.  This
- * locked buffer would confuse end_buffer_async_read() into not unlocking
- * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
- * that this buffer is not under async I/O.
- *
- * The page comes unlocked when it has no locked buffer_async buffers
- * left.
- *
- * PageLocked prevents anyone starting new async I/O reads any of
- * the buffers.
- *
- * PageWriteback is used to prevent simultaneous writeout of the same
- * page.
- *
- * PageLocked prevents anyone from starting writeback of a page which is
- * under read I/O (PageWriteback is only ever set against a locked page).
- */
-static void mark_buffer_async_read(struct buffer_head *bh)
-{
-	bh->b_end_io = end_buffer_async_read;
-	set_buffer_async_read(bh);
-}
-
 static void mark_buffer_async_write_endio(struct buffer_head *bh,
 					  bh_end_io_t *handler)
 {
@@ -2268,8 +2163,54 @@ static void readpage_end_bio(struct bio *bio)
 	bio_put(bio);
 }
 
+struct decrypt_bio_ctx {
+	struct work_struct work;
+	struct bio *bio;
+};
+
+static void decrypt_bio(struct work_struct *work)
+{
+	struct decrypt_bio_ctx *ctx =
+		container_of(work, struct decrypt_bio_ctx, work);
+	struct bio *bio = ctx->bio;
+	struct bio_vec *bvec;
+	int i, err = 0;
+
+	kfree(ctx);
+	bio_for_each_bvec_all(bvec, bio, i) {
+		err = fscrypt_decrypt_pagecache_blocks(bvec->bv_page,
+				bvec->bv_len, bvec->bv_offset);
+		if (err)
+			break;
+	}
+
+	/* XXX: Should report a better error here */
+	if (err)
+		bio->bi_status = BLK_STS_IOERR;
+	readpage_end_bio(bio);
+}
+
+static void decrypt_end_bio(struct bio *bio)
+{
+	struct decrypt_bio_ctx *ctx = NULL;
+
+	if (bio->bi_status == BLK_STS_OK) {
+		ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
+		if (!ctx)
+			bio->bi_status = BLK_STS_RESOURCE;
+	}
+
+	if (ctx) {
+		INIT_WORK(&ctx->work, decrypt_bio);
+		ctx->bio = bio;
+		fscrypt_enqueue_decrypt_work(&ctx->work);
+	} else {
+		readpage_end_bio(bio);
+	}
+}
+
 static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
-		unsigned int nr, struct buffer_head **bhs)
+		unsigned int nr, struct buffer_head **bhs, bio_end_io_t end_bio)
 {
 	struct bio *bio = NULL;
 	unsigned int i;
@@ -2283,7 +2224,8 @@ static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
 		bool same_page;
 
 		if (buffer_uptodate(bh)) {
-			end_buffer_async_read(bh, 1);
+			clear_buffer_async_read(bh);
+			unlock_buffer(bh);
 			blk_completion_sub(cmpl, BLK_STS_OK, 1);
 			continue;
 		}
@@ -2298,7 +2240,7 @@ static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
 		bio_set_dev(bio, bh->b_bdev);
 		bio->bi_iter.bi_sector = sector;
 		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-		bio->bi_end_io = readpage_end_bio;
+		bio->bi_end_io = end_bio;
 		bio->bi_private = cmpl;
 		/* Take care of bh's that straddle the end of the device */
 		guard_bio_eod(bio);
@@ -2314,6 +2256,13 @@ static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
 	return err;
 }
 
+static bio_end_io_t *fscrypt_end_io(struct inode *inode)
+{
+	if (fscrypt_inode_uses_fs_layer_crypto(inode))
+		return decrypt_end_bio;
+	return readpage_end_bio;
+}
+
 /*
  * Generic "read page" function for block devices that have the normal
  * get_block functionality. This is most of the block device filesystems.
@@ -2389,26 +2338,10 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	for (i = 0; i < nr; i++) {
 		bh = arr[i];
 		lock_buffer(bh);
-		mark_buffer_async_read(bh);
+		set_buffer_async_read(bh);
 	}
 
-	if (!fscrypt_inode_uses_fs_layer_crypto(inode))
-		return readpage_submit_bhs(page, cmpl, nr, arr);
-	kfree(cmpl);
-
-	/*
-	 * Stage 3: start the IO.  Check for uptodateness
-	 * inside the buffer lock in case another process reading
-	 * the underlying blockdev brought it uptodate (the sct fix).
-	 */
-	for (i = 0; i < nr; i++) {
-		bh = arr[i];
-		if (buffer_uptodate(bh))
-			end_buffer_async_read(bh, 1);
-		else
-			submit_bh(REQ_OP_READ, 0, bh);
-	}
-	return 0;
+	return readpage_submit_bhs(page, cmpl, nr, arr, fscrypt_end_io(inode));
 }
 EXPORT_SYMBOL(block_read_full_page);
 
@@ -3092,19 +3025,6 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	if (unlikely(bio_flagged(bio, BIO_QUIET)))
 		set_bit(BH_Quiet, &bh->b_state);
 
-	/* Decrypt if needed */
-	if ((bio_data_dir(bio) == READ) && uptodate &&
-	    fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
-		struct decrypt_bio_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
-
-		if (ctx) {
-			INIT_WORK(&ctx->work, decrypt_bio);
-			ctx->bio = bio;
-			fscrypt_enqueue_decrypt_work(&ctx->work);
-			return;
-		}
-		uptodate = 0;
-	}
 	bh->b_end_io(bh, uptodate);
 	bio_put(bio);
 }
-- 
2.28.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] fs: Convert block_read_full_page to be synchronous
  2020-10-22 21:22 ` [PATCH 3/6] fs: Convert block_read_full_page to be synchronous Matthew Wilcox (Oracle)
@ 2020-10-22 23:35   ` Eric Biggers
  2020-10-22 23:40   ` Eric Biggers
  1 sibling, 0 replies; 13+ messages in thread
From: Eric Biggers @ 2020-10-22 23:35 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt

On Thu, Oct 22, 2020 at 10:22:25PM +0100, Matthew Wilcox (Oracle) wrote:
> Use the new blk_completion infrastructure to wait for multiple I/Os.
> Also coalesce adjacent buffer heads into a single BIO instead of
> submitting one BIO per buffer head.  This doesn't work for fscrypt yet,
> so keep the old code around for now.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/buffer.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 90 insertions(+)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 1b0ba1d59966..ccb90081117c 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -2249,6 +2249,87 @@ int block_is_partially_uptodate(struct page *page, unsigned long from,
>  }
>  EXPORT_SYMBOL(block_is_partially_uptodate);
>  
> +static void readpage_end_bio(struct bio *bio)
> +{
> +	struct bio_vec *bvec;
> +	struct page *page;
> +	struct buffer_head *bh;
> +	int i, nr = 0;
> +
> +	bio_for_each_bvec_all(bvec, bio, i) {

Shouldn't this technically be bio_for_each_segment_all()?  This wants to iterate
over the pages, not the bvecs -- and in general, each bvec might contain
multiple pages.

Now, in this case, each bio has only 1 page and 1 bvec, so it doesn't really
matter.  But if we're going to use an iterator, it seems we should use the right
kind.

Likewise in decrypt_bio() in patch 6.

- Eric

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] fs: Convert block_read_full_page to be synchronous
  2020-10-22 21:22 ` [PATCH 3/6] fs: Convert block_read_full_page to be synchronous Matthew Wilcox (Oracle)
  2020-10-22 23:35   ` Eric Biggers
@ 2020-10-22 23:40   ` Eric Biggers
  2020-10-23 13:21     ` Matthew Wilcox
  1 sibling, 1 reply; 13+ messages in thread
From: Eric Biggers @ 2020-10-22 23:40 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt

On Thu, Oct 22, 2020 at 10:22:25PM +0100, Matthew Wilcox (Oracle) wrote:
> +static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
> +		unsigned int nr, struct buffer_head **bhs)
> +{
> +	struct bio *bio = NULL;
> +	unsigned int i;
> +	int err;
> +
> +	blk_completion_init(cmpl, nr);
> +
> +	for (i = 0; i < nr; i++) {
> +		struct buffer_head *bh = bhs[i];
> +		sector_t sector = bh->b_blocknr * (bh->b_size >> 9);
> +		bool same_page;
> +
> +		if (buffer_uptodate(bh)) {
> +			end_buffer_async_read(bh, 1);
> +			blk_completion_sub(cmpl, BLK_STS_OK, 1);
> +			continue;
> +		}
> +		if (bio) {
> +			if (bio_end_sector(bio) == sector &&
> +			    __bio_try_merge_page(bio, bh->b_page, bh->b_size,
> +					bh_offset(bh), &same_page))
> +				continue;
> +			submit_bio(bio);
> +		}
> +		bio = bio_alloc(GFP_NOIO, 1);
> +		bio_set_dev(bio, bh->b_bdev);
> +		bio->bi_iter.bi_sector = sector;
> +		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> +		bio->bi_end_io = readpage_end_bio;
> +		bio->bi_private = cmpl;
> +		/* Take care of bh's that straddle the end of the device */
> +		guard_bio_eod(bio);
> +	}

The following is needed to set the bio encryption context for the
'-o inlinecrypt' case on ext4:

diff --git a/fs/buffer.c b/fs/buffer.c
index 95c338e2b99c..546a08c5003b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2237,6 +2237,7 @@ static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
 			submit_bio(bio);
 		}
 		bio = bio_alloc(GFP_NOIO, 1);
+		fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
 		bio_set_dev(bio, bh->b_bdev);
 		bio->bi_iter.bi_sector = sector;
 		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] fs: Convert block_read_full_page to be synchronous
  2020-10-22 23:40   ` Eric Biggers
@ 2020-10-23 13:21     ` Matthew Wilcox
  2020-10-23 16:13       ` Eric Biggers
  0 siblings, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2020-10-23 13:21 UTC (permalink / raw)
  To: Eric Biggers; +Cc: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt

On Thu, Oct 22, 2020 at 04:40:11PM -0700, Eric Biggers wrote:
> On Thu, Oct 22, 2020 at 10:22:25PM +0100, Matthew Wilcox (Oracle) wrote:
> > +static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
> > +		unsigned int nr, struct buffer_head **bhs)
> > +{
> > +	struct bio *bio = NULL;
> > +	unsigned int i;
> > +	int err;
> > +
> > +	blk_completion_init(cmpl, nr);
> > +
> > +	for (i = 0; i < nr; i++) {
> > +		struct buffer_head *bh = bhs[i];
> > +		sector_t sector = bh->b_blocknr * (bh->b_size >> 9);
> > +		bool same_page;
> > +
> > +		if (buffer_uptodate(bh)) {
> > +			end_buffer_async_read(bh, 1);
> > +			blk_completion_sub(cmpl, BLK_STS_OK, 1);
> > +			continue;
> > +		}
> > +		if (bio) {
> > +			if (bio_end_sector(bio) == sector &&
> > +			    __bio_try_merge_page(bio, bh->b_page, bh->b_size,
> > +					bh_offset(bh), &same_page))
> > +				continue;
> > +			submit_bio(bio);
> > +		}
> > +		bio = bio_alloc(GFP_NOIO, 1);
> > +		bio_set_dev(bio, bh->b_bdev);
> > +		bio->bi_iter.bi_sector = sector;
> > +		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> > +		bio->bi_end_io = readpage_end_bio;
> > +		bio->bi_private = cmpl;
> > +		/* Take care of bh's that straddle the end of the device */
> > +		guard_bio_eod(bio);
> > +	}
> 
> The following is needed to set the bio encryption context for the
> '-o inlinecrypt' case on ext4:
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 95c338e2b99c..546a08c5003b 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -2237,6 +2237,7 @@ static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
>  			submit_bio(bio);
>  		}
>  		bio = bio_alloc(GFP_NOIO, 1);
> +		fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
>  		bio_set_dev(bio, bh->b_bdev);
>  		bio->bi_iter.bi_sector = sector;
>  		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));

Thanks!  I saw that and had every intention of copying it across.
And then I forgot.  I'll add that.  I'm also going to do:

-                           __bio_try_merge_page(bio, bh->b_page, bh->b_size,
-                                       bh_offset(bh), &same_page))
+                           bio_add_page(bio, bh->b_page, bh->b_size,
+                                       bh_offset(bh)))

I wonder about allocating bios that can accommodate more bvecs.  Not sure
how often filesystems have adjacent blocks which go into non-adjacent
sub-page blocks.  It's certainly possible that a filesystem might have
a page consisting of DDhhDDDD ('D' for Data, 'h' for hole), but how
likely is it to have written the two data chunks next to each other?
Maybe with O_SYNC?

Anyway, this patchset needs some more thought because I've just seen
the path from mpage_readahead() to block_read_full_page() that should
definitely not be synchronous.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] fs: Convert block_read_full_page to be synchronous
  2020-10-23 13:21     ` Matthew Wilcox
@ 2020-10-23 16:13       ` Eric Biggers
  2020-10-23 20:42         ` Matthew Wilcox
  0 siblings, 1 reply; 13+ messages in thread
From: Eric Biggers @ 2020-10-23 16:13 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt

On Fri, Oct 23, 2020 at 02:21:38PM +0100, Matthew Wilcox wrote:
> > 
> > The following is needed to set the bio encryption context for the
> > '-o inlinecrypt' case on ext4:
> > 
> > diff --git a/fs/buffer.c b/fs/buffer.c
> > index 95c338e2b99c..546a08c5003b 100644
> > --- a/fs/buffer.c
> > +++ b/fs/buffer.c
> > @@ -2237,6 +2237,7 @@ static int readpage_submit_bhs(struct page *page, struct blk_completion *cmpl,
> >  			submit_bio(bio);
> >  		}
> >  		bio = bio_alloc(GFP_NOIO, 1);
> > +		fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
> >  		bio_set_dev(bio, bh->b_bdev);
> >  		bio->bi_iter.bi_sector = sector;
> >  		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> 
> Thanks!  I saw that and had every intention of copying it across.
> And then I forgot.  I'll add that.  I'm also going to do:
> 
> -                           __bio_try_merge_page(bio, bh->b_page, bh->b_size,
> -                                       bh_offset(bh), &same_page))
> +                           bio_add_page(bio, bh->b_page, bh->b_size,
> +                                       bh_offset(bh)))
> 
> I wonder about allocating bios that can accommodate more bvecs.  Not sure
> how often filesystems have adjacent blocks which go into non-adjacent
> sub-page blocks.  It's certainly possible that a filesystem might have
> a page consisting of DDhhDDDD ('D' for Data, 'h' for hole), but how
> likely is it to have written the two data chunks next to each other?
> Maybe with O_SYNC?
> 

I think that's a rare case that's not very important to optimize.  And there's
already a lot of code where filesystems *could* submit a single bio in that case
but don't.  For example, both fs/direct-io.c and fs/iomap/direct-io.c only
submit bios that contain logically contiguous data.

If you do implement this optimization, note that it wouldn't work when a
bio_crypt_ctx is set, since the data must be logically contiguous in that case.
To handle that you'd need to call fscrypt_mergeable_bio_bh() when adding each
block, and submit the bio if it returns false.  (In contrast, with your current
proposal, calling fscrypt_mergeable_bio_bh() isn't necessary because each bio
only contains logically contiguous data within one page.)

- Eric

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] fs: Convert block_read_full_page to be synchronous
  2020-10-23 16:13       ` Eric Biggers
@ 2020-10-23 20:42         ` Matthew Wilcox
  0 siblings, 0 replies; 13+ messages in thread
From: Matthew Wilcox @ 2020-10-23 20:42 UTC (permalink / raw)
  To: Eric Biggers; +Cc: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt

On Fri, Oct 23, 2020 at 09:13:35AM -0700, Eric Biggers wrote:
> On Fri, Oct 23, 2020 at 02:21:38PM +0100, Matthew Wilcox wrote:
> > I wonder about allocating bios that can accommodate more bvecs.  Not sure
> > how often filesystems have adjacent blocks which go into non-adjacent
> > sub-page blocks.  It's certainly possible that a filesystem might have
> > a page consisting of DDhhDDDD ('D' for Data, 'h' for hole), but how
> > likely is it to have written the two data chunks next to each other?
> > Maybe with O_SYNC?
> 
> I think that's a rare case that's not very important to optimize.  And there's
> already a lot of code where filesystems *could* submit a single bio in that case
> but don't.  For example, both fs/direct-io.c and fs/iomap/direct-io.c only
> submit bios that contain logically contiguous data.

True.  iomap/buffered-io.c will do it though.

> If you do implement this optimization, note that it wouldn't work when a
> bio_crypt_ctx is set, since the data must be logically contiguous in that case.
> To handle that you'd need to call fscrypt_mergeable_bio_bh() when adding each
> block, and submit the bio if it returns false.  (In contrast, with your current
> proposal, calling fscrypt_mergeable_bio_bh() isn't necessary because each bio
> only contains logically contiguous data within one page.)

Oh, that's disappointing.  I had assumed that you'd set up the dun for
the logical block corresponding to the start of the page and then you'd
be able to decrypt any range in the page.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/6] block: Add blk_completion
  2020-10-22 21:22 ` [PATCH 1/6] block: Add blk_completion Matthew Wilcox (Oracle)
@ 2020-10-27 18:30   ` Christoph Hellwig
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2020-10-27 18:30 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-fsdevel, linux-kernel, linux-block, linux-fscrypt

On Thu, Oct 22, 2020 at 10:22:23PM +0100, Matthew Wilcox (Oracle) wrote:
> This new data structure allows a task to wait for N things to complete.
> Usually the submitting task will handle cleanup, but if it is killed,
> the last completer will take care of it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  block/blk-core.c    | 61 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/bio.h | 11 ++++++++
>  2 files changed, 72 insertions(+)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 10c08ac50697..2892246f2176 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1900,6 +1900,67 @@ void blk_io_schedule(void)
>  }
>  EXPORT_SYMBOL_GPL(blk_io_schedule);
>  
> +void blk_completion_init(struct blk_completion *cmpl, int n)
> +{
> +	spin_lock_init(&cmpl->cmpl_lock);
> +	cmpl->cmpl_count = n;
> +	cmpl->cmpl_task = current;
> +	cmpl->cmpl_status = BLK_STS_OK;
> +}
> +
> +int blk_completion_sub(struct blk_completion *cmpl, blk_status_t status, int n)

This needs documentation.  e.g. to explain what 'n' is.

> +int blk_completion_wait_killable(struct blk_completion *cmpl)
> +{
> +	int err = 0;
> +
> +	for (;;) {
> +		set_current_state(TASK_KILLABLE);
> +		spin_lock_bh(&cmpl->cmpl_lock);
> +		if (cmpl->cmpl_count == 0)
> +			break;
> +		spin_unlock_bh(&cmpl->cmpl_lock);
> +		blk_io_schedule();
> +		if (fatal_signal_pending(current)) {
> +			spin_lock_bh(&cmpl->cmpl_lock);
> +			cmpl->cmpl_task = NULL;
> +			if (cmpl->cmpl_count != 0) {
> +				spin_unlock_bh(&cmpl->cmpl_lock);
> +				cmpl = NULL;
> +			}
> +			err = -ERESTARTSYS;
> +			break;
> +		}
> +	}
> +	set_current_state(TASK_RUNNING);
> +	if (cmpl) {
> +		spin_unlock_bh(&cmpl->cmpl_lock);
> +		err = blk_status_to_errno(cmpl->cmpl_status);
> +		kfree(cmpl);
> +	}
> +
> +	return err;
> +}

What are the life time rules for cmpl?  Who frees it in the case
of a fatal signal?

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, back to index

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-22 21:22 [PATCH 0/6] Make block_read_full_page synchronous Matthew Wilcox (Oracle)
2020-10-22 21:22 ` [PATCH 1/6] block: Add blk_completion Matthew Wilcox (Oracle)
2020-10-27 18:30   ` Christoph Hellwig
2020-10-22 21:22 ` [PATCH 2/6] fs: Return error from block_read_full_page Matthew Wilcox (Oracle)
2020-10-22 21:22 ` [PATCH 3/6] fs: Convert block_read_full_page to be synchronous Matthew Wilcox (Oracle)
2020-10-22 23:35   ` Eric Biggers
2020-10-22 23:40   ` Eric Biggers
2020-10-23 13:21     ` Matthew Wilcox
2020-10-23 16:13       ` Eric Biggers
2020-10-23 20:42         ` Matthew Wilcox
2020-10-22 21:22 ` [PATCH 4/6] fs: Hoist fscrypt decryption to bio completion handler Matthew Wilcox (Oracle)
2020-10-22 21:22 ` [PATCH 5/6] fs: Turn decrypt_bh into decrypt_bio Matthew Wilcox (Oracle)
2020-10-22 21:22 ` [PATCH 6/6] fs: Convert block_read_full_page to be synchronous with fscrypt enabled Matthew Wilcox (Oracle)

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git