All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Christian Brauner <brauner@kernel.org>
Cc: "Darrick J. Wong" <djwong@kernel.org>,
	Chandan Babu R <chandan.babu@oracle.com>,
	Zhang Yi <yi.zhang@huaweicloud.com>,
	Ritesh Harjani <ritesh.list@gmail.com>,
	Jens Axboe <axboe@kernel.dk>,
	Andreas Gruenbacher <agruenba@redhat.com>,
	Damien Le Moal <dlemoal@kernel.org>,
	Naohiro Aota <naohiro.aota@wdc.com>,
	Johannes Thumshirn <jth@kernel.org>,
	linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-block@vger.kernel.org, gfs2@lists.linux.dev
Subject: [PATCH 09/14] iomap: don't chain bios
Date: Thu,  7 Dec 2023 08:27:05 +0100	[thread overview]
Message-ID: <20231207072710.176093-10-hch@lst.de> (raw)
In-Reply-To: <20231207072710.176093-1-hch@lst.de>

Back in the days when a single bio could only be filled to the hardware
limits, and we scheduled a work item for each bio completion, chaining
multiple bios for a single ioend made a lot of sense to reduce the number
of completions.  But these days bios can be filled until we reach the
number of vectors or total size limit, which means we can always fit at
least 1 megabyte worth of data in the worst case, but usually a lot more
due to large folios.  The only thing bio chaining is buying us now is
to reduce the size of the allocation from an ioend with an embedded bio
into a plain bio, which is a 52 bytes differences on 64-bit systems.

This is not worth the added complexity, so remove the bio chaining and
only use the bio embedded into the ioend.  This will help to simplify
further changes to the iomap writeback code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 90 +++++++++++-------------------------------
 fs/xfs/xfs_aops.c      |  6 +--
 include/linux/iomap.h  |  8 +++-
 3 files changed, 32 insertions(+), 72 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 78cd5c06ea9b77..7ed11eca7c7c9e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1489,40 +1489,23 @@ static u32
 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
 {
 	struct inode *inode = ioend->io_inode;
-	struct bio *bio = &ioend->io_inline_bio;
-	struct bio *last = ioend->io_bio, *next;
-	u64 start = bio->bi_iter.bi_sector;
-	loff_t offset = ioend->io_offset;
-	bool quiet = bio_flagged(bio, BIO_QUIET);
+	struct bio *bio = &ioend->io_bio;
+	struct folio_iter fi;
 	u32 folio_count = 0;
 
-	for (bio = &ioend->io_inline_bio; bio; bio = next) {
-		struct folio_iter fi;
-
-		/*
-		 * For the last bio, bi_private points to the ioend, so we
-		 * need to explicitly end the iteration here.
-		 */
-		if (bio == last)
-			next = NULL;
-		else
-			next = bio->bi_private;
-
-		/* walk all folios in bio, ending page IO on them */
-		bio_for_each_folio_all(fi, bio) {
-			iomap_finish_folio_write(inode, fi.folio, fi.length,
-					error);
-			folio_count++;
-		}
-		bio_put(bio);
+	/* walk all folios in bio, ending page IO on them */
+	bio_for_each_folio_all(fi, bio) {
+		iomap_finish_folio_write(inode, fi.folio, fi.length, error);
+		folio_count++;
 	}
-	/* The ioend has been freed by bio_put() */
 
-	if (unlikely(error && !quiet)) {
+	if (unlikely(error && !bio_flagged(bio, BIO_QUIET))) {
 		printk_ratelimited(KERN_ERR
 "%s: writeback error on inode %lu, offset %lld, sector %llu",
-			inode->i_sb->s_id, inode->i_ino, offset, start);
+			inode->i_sb->s_id, inode->i_ino,
+			ioend->io_offset, ioend->io_sector);
 	}
+	bio_put(bio);	/* frees the ioend */
 	return folio_count;
 }
 
@@ -1563,7 +1546,7 @@ EXPORT_SYMBOL_GPL(iomap_finish_ioends);
 static bool
 iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
 {
-	if (ioend->io_bio->bi_status != next->io_bio->bi_status)
+	if (ioend->io_bio.bi_status != next->io_bio.bi_status)
 		return false;
 	if ((ioend->io_flags & IOMAP_F_SHARED) ^
 	    (next->io_flags & IOMAP_F_SHARED))
@@ -1628,9 +1611,8 @@ EXPORT_SYMBOL_GPL(iomap_sort_ioends);
 
 static void iomap_writepage_end_bio(struct bio *bio)
 {
-	struct iomap_ioend *ioend = bio->bi_private;
-
-	iomap_finish_ioend(ioend, blk_status_to_errno(bio->bi_status));
+	iomap_finish_ioend(iomap_ioend_from_bio(bio),
+			blk_status_to_errno(bio->bi_status));
 }
 
 /*
@@ -1645,9 +1627,6 @@ static int
 iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
 		int error)
 {
-	ioend->io_bio->bi_private = ioend;
-	ioend->io_bio->bi_end_io = iomap_writepage_end_bio;
-
 	if (wpc->ops->prepare_ioend)
 		error = wpc->ops->prepare_ioend(ioend, error);
 	if (error) {
@@ -1657,12 +1636,12 @@ iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
 		 * as there is only one reference to the ioend at this point in
 		 * time.
 		 */
-		ioend->io_bio->bi_status = errno_to_blk_status(error);
-		bio_endio(ioend->io_bio);
+		ioend->io_bio.bi_status = errno_to_blk_status(error);
+		bio_endio(&ioend->io_bio);
 		return error;
 	}
 
-	submit_bio(ioend->io_bio);
+	submit_bio(&ioend->io_bio);
 	return 0;
 }
 
@@ -1676,44 +1655,22 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
 			       REQ_OP_WRITE | wbc_to_write_flags(wbc),
 			       GFP_NOFS, &iomap_ioend_bioset);
 	bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos);
+	bio->bi_end_io = iomap_writepage_end_bio;
 	wbc_init_bio(wbc, bio);
 
-	ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
+	ioend = iomap_ioend_from_bio(bio);
 	INIT_LIST_HEAD(&ioend->io_list);
 	ioend->io_type = wpc->iomap.type;
 	ioend->io_flags = wpc->iomap.flags;
 	ioend->io_inode = inode;
 	ioend->io_size = 0;
 	ioend->io_offset = pos;
-	ioend->io_bio = bio;
 	ioend->io_sector = bio->bi_iter.bi_sector;
 
 	wpc->nr_folios = 0;
 	return ioend;
 }
 
-/*
- * Allocate a new bio, and chain the old bio to the new one.
- *
- * Note that we have to perform the chaining in this unintuitive order
- * so that the bi_private linkage is set up in the right direction for the
- * traversal in iomap_finish_ioend().
- */
-static struct bio *
-iomap_chain_bio(struct bio *prev)
-{
-	struct bio *new;
-
-	new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
-	bio_clone_blkg_association(new, prev);
-	new->bi_iter.bi_sector = bio_end_sector(prev);
-
-	bio_chain(prev, new);
-	bio_get(prev);		/* for iomap_finish_ioend */
-	submit_bio(prev);
-	return new;
-}
-
 static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
 {
 	if ((wpc->iomap.flags & IOMAP_F_SHARED) !=
@@ -1724,7 +1681,7 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
 	if (pos != wpc->ioend->io_offset + wpc->ioend->io_size)
 		return false;
 	if (iomap_sector(&wpc->iomap, pos) !=
-	    bio_end_sector(wpc->ioend->io_bio))
+	    bio_end_sector(&wpc->ioend->io_bio))
 		return false;
 	/*
 	 * Limit ioend bio chain lengths to minimise IO completion latency. This
@@ -1749,15 +1706,14 @@ static void iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
 	size_t poff = offset_in_folio(folio, pos);
 
 	if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
+new_ioend:
 		if (wpc->ioend)
 			list_add(&wpc->ioend->io_list, iolist);
 		wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos);
 	}
 
-	if (!bio_add_folio(wpc->ioend->io_bio, folio, len, poff)) {
-		wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio);
-		bio_add_folio_nofail(wpc->ioend->io_bio, folio, len, poff);
-	}
+	if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
+		goto new_ioend;
 
 	if (ifs)
 		atomic_add(len, &ifs->write_bytes_pending);
@@ -1988,7 +1944,7 @@ EXPORT_SYMBOL_GPL(iomap_writepages);
 static int __init iomap_init(void)
 {
 	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
-			   offsetof(struct iomap_ioend, io_inline_bio),
+			   offsetof(struct iomap_ioend, io_bio),
 			   BIOSET_NEED_BVECS);
 }
 fs_initcall(iomap_init);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 465d7630bb2185..b45ee6cbbdaab2 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -112,7 +112,7 @@ xfs_end_ioend(
 	 * longer dirty. If we don't remove delalloc blocks here, they become
 	 * stale and can corrupt free space accounting on unmount.
 	 */
-	error = blk_status_to_errno(ioend->io_bio->bi_status);
+	error = blk_status_to_errno(ioend->io_bio.bi_status);
 	if (unlikely(error)) {
 		if (ioend->io_flags & IOMAP_F_SHARED) {
 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
@@ -179,7 +179,7 @@ STATIC void
 xfs_end_bio(
 	struct bio		*bio)
 {
-	struct iomap_ioend	*ioend = bio->bi_private;
+	struct iomap_ioend	*ioend = iomap_ioend_from_bio(bio);
 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	unsigned long		flags;
 
@@ -444,7 +444,7 @@ xfs_prepare_ioend(
 	/* send ioends that might require a transaction to the completion wq */
 	if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
 	    (ioend->io_flags & IOMAP_F_SHARED))
-		ioend->io_bio->bi_end_io = xfs_end_bio;
+		ioend->io_bio.bi_end_io = xfs_end_bio;
 	return status;
 }
 
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index b2a05dff914d0c..b8d3b658ad2b03 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -297,10 +297,14 @@ struct iomap_ioend {
 	size_t			io_size;	/* size of the extent */
 	loff_t			io_offset;	/* offset in the file */
 	sector_t		io_sector;	/* start sector of ioend */
-	struct bio		*io_bio;	/* bio being built */
-	struct bio		io_inline_bio;	/* MUST BE LAST! */
+	struct bio		io_bio;		/* MUST BE LAST! */
 };
 
+static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio)
+{
+	return container_of(bio, struct iomap_ioend, io_bio);
+}
+
 struct iomap_writeback_ops {
 	/*
 	 * Required, maps the blocks so that writeback can be performed on
-- 
2.39.2


  parent reply	other threads:[~2023-12-07  7:27 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-07  7:26 map multiple blocks per ->map_blocks in iomap writeback Christoph Hellwig
2023-12-07  7:26 ` [PATCH 01/14] iomap: clear the per-folio dirty bits on all writeback failures Christoph Hellwig
2023-12-07  7:26 ` [PATCH 02/14] iomap: treat inline data in iomap_writepage_map as an I/O error Christoph Hellwig
2023-12-07  7:26 ` [PATCH 03/14] iomap: move the io_folios field out of struct iomap_ioend Christoph Hellwig
2023-12-07  7:27 ` [PATCH 04/14] iomap: move the PF_MEMALLOC check to iomap_writepages Christoph Hellwig
2023-12-07  7:27 ` [PATCH 05/14] iomap: factor out a iomap_writepage_handle_eof helper Christoph Hellwig
2023-12-07  7:27 ` [PATCH 06/14] iomap: move all remaining per-folio logic into iomap_writepage_map Christoph Hellwig
2023-12-07  7:27 ` [PATCH 07/14] iomap: clean up the iomap_alloc_ioend calling convention Christoph Hellwig
2023-12-07  7:27 ` [PATCH 08/14] iomap: move the iomap_sector sector calculation out of iomap_add_to_ioend Christoph Hellwig
2023-12-07  7:27 ` Christoph Hellwig [this message]
2023-12-07  7:27 ` [PATCH 10/14] iomap: only call mapping_set_error once for each failed bio Christoph Hellwig
2023-12-07  7:27 ` [PATCH 11/14] iomap: factor out a iomap_writepage_map_block helper Christoph Hellwig
2023-12-07  7:27 ` [PATCH 12/14] iomap: submit ioends immediately Christoph Hellwig
2023-12-07  7:27 ` [PATCH 13/14] iomap: map multiple blocks at a time Christoph Hellwig
2023-12-07 13:39   ` Zhang Yi
2023-12-07 15:03     ` Christoph Hellwig
2023-12-08  7:33       ` Zhang Yi
2023-12-07  7:27 ` [PATCH 14/14] iomap: pass the length of the dirty region to ->map_blocks Christoph Hellwig
2023-12-11 10:45 ` map multiple blocks per ->map_blocks in iomap writeback Christian Brauner
2024-02-01  6:07   ` Christoph Hellwig
2024-02-01  6:18     ` Darrick J. Wong
2024-02-01 13:23       ` Christian Brauner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231207072710.176093-10-hch@lst.de \
    --to=hch@lst.de \
    --cc=agruenba@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=chandan.babu@oracle.com \
    --cc=djwong@kernel.org \
    --cc=dlemoal@kernel.org \
    --cc=gfs2@lists.linux.dev \
    --cc=jth@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=naohiro.aota@wdc.com \
    --cc=ritesh.list@gmail.com \
    --cc=yi.zhang@huaweicloud.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.