All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fs/buffer: try to submit writeback bio in unit of page
@ 2020-12-30  0:08 Ming Lei
  2021-01-04  8:44 ` Christoph Hellwig
  0 siblings, 1 reply; 3+ messages in thread
From: Ming Lei @ 2020-12-30  0:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ming Lei, Alexander Viro, linux-fsdevel, linux-block,
	Christoph Hellwig, Jens Axboe

It is observed that __block_write_full_page() always submit bio with size of block size,
which is often 512 bytes.

In case of sequential IO, or >=4k BS random/seq writeback IO, most of times IO
represented by all buffer_head in each page can be done in single bio. It is actually
done in single request IO by block layer's plug merge too.

So check if IO represented by buffer_head can be merged to single page
IO, if yes, just submit single bio instead of submitting one bio for each buffer_head.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 fs/buffer.c | 112 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 90 insertions(+), 22 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 32647d2011df..6bcf9ce5d7f8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -54,6 +54,8 @@
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 			 enum rw_hint hint, struct writeback_control *wbc);
+static int submit_page_wbc(int op, int op_flags, struct buffer_head *bh,
+			 enum rw_hint hint, struct writeback_control *wbc);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -1716,10 +1718,12 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	int err;
 	sector_t block;
 	sector_t last_block;
-	struct buffer_head *bh, *head;
+	struct buffer_head *bh, *head, *prev_bh;
 	unsigned int blocksize, bbits;
 	int nr_underway = 0;
 	int write_flags = wbc_to_write_flags(wbc);
+	unsigned int total_size = 0;
+	bool continuous = true;
 
 	head = create_page_buffers(page, inode,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1774,6 +1778,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 		block++;
 	} while (bh != head);
 
+	prev_bh = NULL;
 	do {
 		if (!buffer_mapped(bh))
 			continue;
@@ -1792,9 +1797,17 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write_endio(bh, handler);
+			total_size += bh->b_size;
 		} else {
 			unlock_buffer(bh);
 		}
+
+		if (continuous && prev_bh && !(
+		    prev_bh->b_blocknr + 1 == bh->b_blocknr &&
+		    prev_bh->b_bdev == bh->b_bdev &&
+		    buffer_meta(prev_bh) == buffer_meta(bh)))
+			continuous = false;
+		prev_bh = bh;
 	} while ((bh = bh->b_this_page) != head);
 
 	/*
@@ -1804,15 +1817,21 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
 
-	do {
-		struct buffer_head *next = bh->b_this_page;
-		if (buffer_async_write(bh)) {
-			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
-					inode->i_write_hint, wbc);
-			nr_underway++;
-		}
-		bh = next;
-	} while (bh != head);
+	if (total_size == PAGE_SIZE && continuous) {
+		submit_page_wbc(REQ_OP_WRITE, write_flags, bh,
+				inode->i_write_hint, wbc);
+		nr_underway = MAX_BUF_PER_PAGE;
+	} else {
+		do {
+			struct buffer_head *next = bh->b_this_page;
+			if (buffer_async_write(bh)) {
+				submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+						inode->i_write_hint, wbc);
+				nr_underway++;
+			}
+			bh = next;
+		} while (bh != head);
+	}
 	unlock_page(page);
 
 	err = 0;
@@ -3006,8 +3025,28 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	bio_put(bio);
 }
 
-static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-			 enum rw_hint write_hint, struct writeback_control *wbc)
+static void end_bio_page_io_sync(struct bio *bio)
+{
+	struct buffer_head *head = bio->bi_private;
+	struct buffer_head *bh = head;
+
+	do {
+		struct buffer_head *next = bh->b_this_page;
+
+		if (unlikely(bio_flagged(bio, BIO_QUIET)))
+			set_bit(BH_Quiet, &bh->b_state);
+
+		bh->b_end_io(bh, !bio->bi_status);
+		bh = next;
+	} while (bh != head);
+
+	bio_put(bio);
+}
+
+static int __submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
+			   enum rw_hint write_hint,
+			   struct writeback_control *wbc, unsigned int size,
+			   bio_end_io_t   *end_io_handler)
 {
 	struct bio *bio;
 
@@ -3017,12 +3056,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	BUG_ON(buffer_delay(bh));
 	BUG_ON(buffer_unwritten(bh));
 
-	/*
-	 * Only clear out a write error when rewriting
-	 */
-	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
-		clear_buffer_write_io_error(bh);
-
 	bio = bio_alloc(GFP_NOIO, 1);
 
 	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
@@ -3031,10 +3064,10 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_write_hint = write_hint;
 
-	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-	BUG_ON(bio->bi_iter.bi_size != bh->b_size);
+	bio_add_page(bio, bh->b_page, size, bh_offset(bh));
+	BUG_ON(bio->bi_iter.bi_size != size);
 
-	bio->bi_end_io = end_bio_bh_io_sync;
+	bio->bi_end_io = end_io_handler;
 	bio->bi_private = bh;
 
 	if (buffer_meta(bh))
@@ -3048,13 +3081,48 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
 	if (wbc) {
 		wbc_init_bio(wbc, bio);
-		wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
+		wbc_account_cgroup_owner(wbc, bh->b_page, size);
 	}
 
 	submit_bio(bio);
 	return 0;
 }
 
+static inline int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
+				enum rw_hint write_hint,
+				struct writeback_control *wbc)
+{
+	/*
+	 * Only clear out a write error when rewriting
+	 */
+	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
+		clear_buffer_write_io_error(bh);
+
+	return __submit_bh_wbc(op, op_flags, bh, write_hint, wbc, bh->b_size,
+			       end_bio_bh_io_sync);
+}
+
+static int submit_page_wbc(int op, int op_flags, struct buffer_head *head,
+			   enum rw_hint write_hint,
+			   struct writeback_control *wbc)
+{
+	struct buffer_head *bh = head;
+
+	WARN_ON(bh_offset(head) != 0);
+
+	/*
+	 * Only clear out a write error when rewriting
+	 */
+	do {
+		if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
+			clear_buffer_write_io_error(bh);
+		bh = bh->b_this_page;
+	} while (bh != head);
+
+	return __submit_bh_wbc(op, op_flags, head, write_hint, wbc, PAGE_SIZE,
+			       end_bio_page_io_sync);
+}
+
 int submit_bh(int op, int op_flags, struct buffer_head *bh)
 {
 	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] fs/buffer: try to submit writeback bio in unit of page
  2020-12-30  0:08 [PATCH] fs/buffer: try to submit writeback bio in unit of page Ming Lei
@ 2021-01-04  8:44 ` Christoph Hellwig
  2021-01-04  9:24   ` Ming Lei
  0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2021-01-04  8:44 UTC (permalink / raw)
  To: Ming Lei
  Cc: linux-kernel, Alexander Viro, linux-fsdevel, linux-block,
	Christoph Hellwig, Jens Axboe

On Wed, Dec 30, 2020 at 08:08:15AM +0800, Ming Lei wrote:
> It is observed that __block_write_full_page() always submit bio with size of block size,
> which is often 512 bytes.
> 
> In case of sequential IO, or >=4k BS random/seq writeback IO, most of times IO
> represented by all buffer_head in each page can be done in single bio. It is actually
> done in single request IO by block layer's plug merge too.
> 
> So check if IO represented by buffer_head can be merged to single page
> IO, if yes, just submit single bio instead of submitting one bio for each buffer_head.

There is some very weird formatting in here.  From a very quick look
the changes look sensible, but I wonder if we should spend so much
time optimizing the legacy buffer_head I/O path, rather than switching
callers to saner helpers.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] fs/buffer: try to submit writeback bio in unit of page
  2021-01-04  8:44 ` Christoph Hellwig
@ 2021-01-04  9:24   ` Ming Lei
  0 siblings, 0 replies; 3+ messages in thread
From: Ming Lei @ 2021-01-04  9:24 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-kernel, Alexander Viro, linux-fsdevel, linux-block, Jens Axboe

On Mon, Jan 04, 2021 at 09:44:15AM +0100, Christoph Hellwig wrote:
> On Wed, Dec 30, 2020 at 08:08:15AM +0800, Ming Lei wrote:
> > It is observed that __block_write_full_page() always submit bio with size of block size,
> > which is often 512 bytes.
> > 
> > In case of sequential IO, or >=4k BS random/seq writeback IO, most of times IO
> > represented by all buffer_head in each page can be done in single bio. It is actually
> > done in single request IO by block layer's plug merge too.
> > 
> > So check if IO represented by buffer_head can be merged to single page
> > IO, if yes, just submit single bio instead of submitting one bio for each buffer_head.
> 
> There is some very weird formatting in here.  From a very quick look
> the changes look sensible, but I wonder if we should spend so much
> time optimizing the legacy buffer_head I/O path, rather than switching
> callers to saner helpers.

It may take long to convert fs code into iomap, and I understand fs/block_dev.c
can't be converted to iomap until all FS removes buffer_head, correct me
if it is wrong.


Thanks,
Ming


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-04  9:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-30  0:08 [PATCH] fs/buffer: try to submit writeback bio in unit of page Ming Lei
2021-01-04  8:44 ` Christoph Hellwig
2021-01-04  9:24   ` Ming Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.