All of lore.kernel.org
 help / color / mirror / Atom feed
* reduce memory allocation in the btrfs direct I/O path
@ 2022-05-04 16:23 Christoph Hellwig
  2022-05-04 16:23 ` [PATCH 1/5] iomap: allow the file system to provide a bio_set for direct I/O Christoph Hellwig
                   ` (6 more replies)
  0 siblings, 7 replies; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-04 16:23 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel

Hi all,

this series adds two minor improvements to iomap that allow btrfs
to avoid a memory allocation per read/write system call and another
one per submitted bio.  I also have at last two other pending uses
for the iomap functionality later on, so they are not really btrfs
specific either.

Diffstat:
 fs/btrfs/btrfs_inode.h |   25 --------
 fs/btrfs/ctree.h       |    6 -
 fs/btrfs/file.c        |    6 -
 fs/btrfs/inode.c       |  152 +++++++++++++++++++++++--------------------------
 fs/iomap/direct-io.c   |   26 +++++++-
 include/linux/iomap.h  |    4 +
 6 files changed, 104 insertions(+), 115 deletions(-)

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/5] iomap: allow the file system to provide a bio_set for direct I/O
  2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
@ 2022-05-04 16:23 ` Christoph Hellwig
  2022-05-05 15:38   ` Darrick J. Wong
  2022-05-04 16:23 ` [PATCH 2/5] iomap: add per-iomap_iter private data Christoph Hellwig
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-04 16:23 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel

Allow the file system to provide a specific bio_set for allocating
direct I/O bios.  This will allow file systems that use the
->submit_io hook to stash away additional information for file system
use.

To make use of this additional space for information in the completion
path, the file system needs to override the ->bi_end_io callback and
then call back into iomap, so export iomap_dio_bio_end_io for that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/direct-io.c  | 18 ++++++++++++++----
 include/linux/iomap.h |  3 +++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b08f5dc31780d..15929690d89e3 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -51,6 +51,15 @@ struct iomap_dio {
 	};
 };
 
+static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
+		struct iomap_dio *dio, unsigned short nr_vecs, unsigned int opf)
+{
+	if (dio->dops && dio->dops->bio_set)
+		return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf,
+					GFP_KERNEL, dio->dops->bio_set);
+	return bio_alloc(iter->iomap.bdev, nr_vecs, opf, GFP_KERNEL);
+}
+
 static void iomap_dio_submit_bio(const struct iomap_iter *iter,
 		struct iomap_dio *dio, struct bio *bio, loff_t pos)
 {
@@ -144,7 +153,7 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
 	cmpxchg(&dio->error, 0, ret);
 }
 
-static void iomap_dio_bio_end_io(struct bio *bio)
+void iomap_dio_bio_end_io(struct bio *bio)
 {
 	struct iomap_dio *dio = bio->bi_private;
 	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
@@ -176,16 +185,17 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 		bio_put(bio);
 	}
 }
+EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
 
 static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 		loff_t pos, unsigned len)
 {
 	struct inode *inode = file_inode(dio->iocb->ki_filp);
 	struct page *page = ZERO_PAGE(0);
-	int flags = REQ_SYNC | REQ_IDLE;
 	struct bio *bio;
 
-	bio = bio_alloc(iter->iomap.bdev, 1, REQ_OP_WRITE | flags, GFP_KERNEL);
+	bio = iomap_dio_alloc_bio(iter, dio, 1,
+			REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
 	fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
 				  GFP_KERNEL);
 	bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
@@ -311,7 +321,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 			goto out;
 		}
 
-		bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL);
+		bio = iomap_dio_alloc_bio(iter, dio, nr_pages, bio_opf);
 		fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
 					  GFP_KERNEL);
 		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index b76f0dd149fb4..a5483020dad41 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -320,6 +320,8 @@ struct iomap_dio_ops {
 		      unsigned flags);
 	void (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
 		          loff_t file_offset);
+
+	struct bio_set *bio_set;
 };
 
 /*
@@ -349,6 +351,7 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		unsigned int dio_flags, size_t done_before);
 ssize_t iomap_dio_complete(struct iomap_dio *dio);
+void iomap_dio_bio_end_io(struct bio *bio);
 
 #ifdef CONFIG_SWAP
 struct file;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
  2022-05-04 16:23 ` [PATCH 1/5] iomap: allow the file system to provide a bio_set for direct I/O Christoph Hellwig
@ 2022-05-04 16:23 ` Christoph Hellwig
  2022-05-05  8:06   ` Nikolay Borisov
  2022-05-05 15:41   ` Darrick J. Wong
  2022-05-04 16:23 ` [PATCH 3/5] btrfs: add a btrfs_dio_rw wrapper Christoph Hellwig
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-04 16:23 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel

Allow the file system to keep state for all iterations.  For now only
wire it up for direct I/O as there is an immediate need for it there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/direct-io.c  | 8 ++++++++
 include/linux/iomap.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 15929690d89e3..355abe2eacc6a 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -520,6 +520,14 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	dio->submit.waiter = current;
 	dio->submit.poll_bio = NULL;
 
+	/*
+	 * Transfer the private data that was passed by the caller to the
+	 * iomap_iter, and clear it in the iocb, as iocb->private will be
+	 * used for polled bio completion later.
+	 */
+	iomi.private = iocb->private;
+	WRITE_ONCE(iocb->private, NULL);
+
 	if (iov_iter_rw(iter) == READ) {
 		if (iomi.pos >= dio->i_size)
 			goto out_free_dio;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index a5483020dad41..109c055865f73 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -188,6 +188,7 @@ struct iomap_iter {
 	unsigned flags;
 	struct iomap iomap;
 	struct iomap srcmap;
+	void *private;
 };
 
 int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/5] btrfs: add a btrfs_dio_rw wrapper
  2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
  2022-05-04 16:23 ` [PATCH 1/5] iomap: allow the file system to provide a bio_set for direct I/O Christoph Hellwig
  2022-05-04 16:23 ` [PATCH 2/5] iomap: add per-iomap_iter private data Christoph Hellwig
@ 2022-05-04 16:23 ` Christoph Hellwig
  2022-05-04 16:23 ` [PATCH 4/5] btrfs: allocate dio_data on stack Christoph Hellwig
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-04 16:23 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel

Add a wrapper around iomap_dio_rw that keeps the direct I/O internals
isolated in inode.c.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h |  5 +++--
 fs/btrfs/file.c  |  6 ++----
 fs/btrfs/inode.c | 11 +++++++++--
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6e939bf01dcc3..aa6e71fdc72b9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3358,9 +3358,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
 ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
 			     const struct btrfs_ioctl_encoded_io_args *encoded);
 
+ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+		     size_t done_before);
+
 extern const struct dentry_operations btrfs_dentry_operations;
-extern const struct iomap_ops btrfs_dio_iomap_ops;
-extern const struct iomap_dio_ops btrfs_dio_ops;
 
 /* Inode locking type flags, by default the exclusive lock is taken */
 #define BTRFS_ILOCK_SHARED	(1U << 0)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b64fb93d90469..46c2baa8fdf54 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1929,8 +1929,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	 */
 again:
 	from->nofault = true;
-	err = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
-			   IOMAP_DIO_PARTIAL, written);
+	err = btrfs_dio_rw(iocb, from, written);
 	from->nofault = false;
 
 	/* No increment (+=) because iomap returns a cumulative value. */
@@ -3693,8 +3692,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
 	 */
 	pagefault_disable();
 	to->nofault = true;
-	ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
-			   IOMAP_DIO_PARTIAL, read);
+	ret = btrfs_dio_rw(iocb, to, read);
 	to->nofault = false;
 	pagefault_enable();
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b42d6e7e4049f..cdf96a2472821 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8155,15 +8155,22 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 	btrfs_dio_private_put(dip);
 }
 
-const struct iomap_ops btrfs_dio_iomap_ops = {
+static const struct iomap_ops btrfs_dio_iomap_ops = {
 	.iomap_begin            = btrfs_dio_iomap_begin,
 	.iomap_end              = btrfs_dio_iomap_end,
 };
 
-const struct iomap_dio_ops btrfs_dio_ops = {
+static const struct iomap_dio_ops btrfs_dio_ops = {
 	.submit_io		= btrfs_submit_direct,
 };
 
+ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+		size_t done_before)
+{
+	return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+			   IOMAP_DIO_PARTIAL, done_before);
+}
+
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			u64 start, u64 len)
 {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/5] btrfs: allocate dio_data on stack
  2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
                   ` (2 preceding siblings ...)
  2022-05-04 16:23 ` [PATCH 3/5] btrfs: add a btrfs_dio_rw wrapper Christoph Hellwig
@ 2022-05-04 16:23 ` Christoph Hellwig
  2022-05-04 16:23 ` [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio Christoph Hellwig
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-04 16:23 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel

Make use of the new iomap_iter->private field to avoid a memory
allocation per iomap range.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/inode.c | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cdf96a2472821..6ecff32c1fc22 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7546,10 +7546,11 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
 		loff_t length, unsigned int flags, struct iomap *iomap,
 		struct iomap *srcmap)
 {
+	struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_map *em;
 	struct extent_state *cached_state = NULL;
-	struct btrfs_dio_data *dio_data = NULL;
+	struct btrfs_dio_data *dio_data = iter->private;
 	u64 lockstart, lockend;
 	const bool write = !!(flags & IOMAP_WRITE);
 	int ret = 0;
@@ -7595,17 +7596,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
 		}
 	}
 
-	if (flags & IOMAP_NOWAIT) {
-		dio_data = kzalloc(sizeof(*dio_data), GFP_NOWAIT);
-		if (!dio_data)
-			return -EAGAIN;
-	} else {
-		dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS);
-		if (!dio_data)
-			return -ENOMEM;
-	}
-
-	iomap->private = dio_data;
+	memset(dio_data, 0, sizeof(*dio_data));
 
 	/*
 	 * We always try to allocate data space and must do it before locking
@@ -7769,23 +7760,22 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
 		extent_changeset_free(dio_data->data_reserved);
 	}
 
-	kfree(dio_data);
-
 	return ret;
 }
 
 static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 		ssize_t written, unsigned int flags, struct iomap *iomap)
 {
-	int ret = 0;
-	struct btrfs_dio_data *dio_data = iomap->private;
+	struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
+	struct btrfs_dio_data *dio_data = iter->private;
 	size_t submitted = dio_data->submitted;
 	const bool write = !!(flags & IOMAP_WRITE);
+	int ret = 0;
 
 	if (!write && (iomap->type == IOMAP_HOLE)) {
 		/* If reading from a hole, unlock and return */
 		unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1);
-		goto out;
+		return 0;
 	}
 
 	if (submitted < length) {
@@ -7802,10 +7792,6 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 
 	if (write)
 		extent_changeset_free(dio_data->data_reserved);
-out:
-	kfree(dio_data);
-	iomap->private = NULL;
-
 	return ret;
 }
 
@@ -8041,7 +8027,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 	int ret;
 	blk_status_t status;
 	struct btrfs_io_geometry geom;
-	struct btrfs_dio_data *dio_data = iter->iomap.private;
+	struct btrfs_dio_data *dio_data = iter->private;
 	struct extent_map *em = NULL;
 
 	dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
@@ -8167,6 +8153,9 @@ static const struct iomap_dio_ops btrfs_dio_ops = {
 ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		size_t done_before)
 {
+	struct btrfs_dio_data data;
+
+	iocb->private = &data;
 	return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
 			   IOMAP_DIO_PARTIAL, done_before);
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio
  2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
                   ` (3 preceding siblings ...)
  2022-05-04 16:23 ` [PATCH 4/5] btrfs: allocate dio_data on stack Christoph Hellwig
@ 2022-05-04 16:23 ` Christoph Hellwig
  2022-05-05  8:12   ` Nikolay Borisov
  2022-05-05 15:52   ` David Sterba
  2022-05-05  8:33 ` reduce memory allocation in the btrfs direct I/O path Nikolay Borisov
  2022-05-05 15:55 ` David Sterba
  6 siblings, 2 replies; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-04 16:23 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel

Create a new bio_set that contains all the per-bio private data needed
by btrfs for direct I/O and tell the iomap code to use that instead
of separately allocation the btrfs_dio_private structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/btrfs_inode.h |  25 ----------
 fs/btrfs/ctree.h       |   1 -
 fs/btrfs/inode.c       | 108 ++++++++++++++++++++---------------------
 3 files changed, 53 insertions(+), 81 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 32131a5d321b3..33811e896623f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -395,31 +395,6 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
 	return true;
 }
 
-struct btrfs_dio_private {
-	struct inode *inode;
-
-	/*
-	 * Since DIO can use anonymous page, we cannot use page_offset() to
-	 * grab the file offset, thus need a dedicated member for file offset.
-	 */
-	u64 file_offset;
-	u64 disk_bytenr;
-	/* Used for bio::bi_size */
-	u32 bytes;
-
-	/*
-	 * References to this structure. There is one reference per in-flight
-	 * bio plus one while we're still setting up.
-	 */
-	refcount_t refs;
-
-	/* dio_bio came from fs/direct-io.c */
-	struct bio *dio_bio;
-
-	/* Array of checksums */
-	u8 csums[];
-};
-
 /*
  * btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
  * separate u32s. These two functions convert between the two representations.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index aa6e71fdc72b9..fa64323c453f5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3217,7 +3217,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
 int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
 
 /* file-item.c */
-struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
 		    struct btrfs_root *root, u64 bytenr, u64 len);
 blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6ecff32c1fc22..f0c74bd1e6c19 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -68,6 +68,31 @@ struct btrfs_dio_data {
 	bool nocow_done;
 };
 
+struct btrfs_dio_private {
+	struct inode *inode;
+
+	/*
+	 * Since DIO can use anonymous page, we cannot use page_offset() to
+	 * grab the file offset, thus need a dedicated member for file offset.
+	 */
+	u64 file_offset;
+	/* Used for bio::bi_size */
+	u32 bytes;
+
+	/*
+	 * References to this structure. There is one reference per in-flight
+	 * bio plus one while we're still setting up.
+	 */
+	refcount_t refs;
+
+	/* Array of checksums */
+	u8 *csums;
+
+	struct bio bio;
+};
+
+static struct bio_set btrfs_dio_bioset;
+
 struct btrfs_rename_ctx {
 	/* Output field. Stores the index number of the old directory entry. */
 	u64 index;
@@ -7804,19 +7829,19 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
 	if (!refcount_dec_and_test(&dip->refs))
 		return;
 
-	if (btrfs_op(dip->dio_bio) == BTRFS_MAP_WRITE) {
+	if (btrfs_op(&dip->bio) == BTRFS_MAP_WRITE) {
 		__endio_write_update_ordered(BTRFS_I(dip->inode),
 					     dip->file_offset,
 					     dip->bytes,
-					     !dip->dio_bio->bi_status);
+					     !dip->bio.bi_status);
 	} else {
 		unlock_extent(&BTRFS_I(dip->inode)->io_tree,
 			      dip->file_offset,
 			      dip->file_offset + dip->bytes - 1);
 	}
 
-	bio_endio(dip->dio_bio);
-	kfree(dip);
+	kfree(dip->csums);
+	bio_endio(&dip->bio);
 }
 
 static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
@@ -7918,7 +7943,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
 		err = btrfs_check_read_dio_bio(dip, bbio, !err);
 
 	if (err)
-		dip->dio_bio->bi_status = err;
+		dip->bio.bi_status = err;
 
 	btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
 
@@ -7973,50 +7998,16 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 	return ret;
 }
 
-/*
- * If this succeeds, the btrfs_dio_private is responsible for cleaning up locked
- * or ordered extents whether or not we submit any bios.
- */
-static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
-							  struct inode *inode,
-							  loff_t file_offset)
-{
-	const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
-	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
-	size_t dip_size;
-	struct btrfs_dio_private *dip;
-
-	dip_size = sizeof(*dip);
-	if (!write && csum) {
-		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-		size_t nblocks;
-
-		nblocks = dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits;
-		dip_size += fs_info->csum_size * nblocks;
-	}
-
-	dip = kzalloc(dip_size, GFP_NOFS);
-	if (!dip)
-		return NULL;
-
-	dip->inode = inode;
-	dip->file_offset = file_offset;
-	dip->bytes = dio_bio->bi_iter.bi_size;
-	dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9;
-	dip->dio_bio = dio_bio;
-	refcount_set(&dip->refs, 1);
-	return dip;
-}
-
 static void btrfs_submit_direct(const struct iomap_iter *iter,
 		struct bio *dio_bio, loff_t file_offset)
 {
+	struct btrfs_dio_private *dip =
+		container_of(dio_bio, struct btrfs_dio_private, bio);
 	struct inode *inode = iter->inode;
 	const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
 			     BTRFS_BLOCK_GROUP_RAID56_MASK);
-	struct btrfs_dio_private *dip;
 	struct bio *bio;
 	u64 start_sector;
 	int async_submit = 0;
@@ -8030,24 +8021,24 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 	struct btrfs_dio_data *dio_data = iter->private;
 	struct extent_map *em = NULL;
 
-	dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
-	if (!dip) {
-		if (!write) {
-			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
-				file_offset + dio_bio->bi_iter.bi_size - 1);
-		}
-		dio_bio->bi_status = BLK_STS_RESOURCE;
-		bio_endio(dio_bio);
-		return;
-	}
+	dip->inode = inode;
+	dip->file_offset = file_offset;
+	dip->bytes = dio_bio->bi_iter.bi_size;
+	refcount_set(&dip->refs, 1);
+	dip->csums = NULL;
 
-	if (!write) {
+	if (!write && !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
 		/*
 		 * Load the csums up front to reduce csum tree searches and
 		 * contention when submitting bios.
-		 *
-		 * If we have csums disabled this will do nothing.
 		 */
+		status = BLK_STS_RESOURCE;
+		dip->csums = kzalloc(fs_info->csum_size *
+			(dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits),
+			GFP_NOFS);
+		if (!dip)
+			goto out_err;
+
 		status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
 		if (status != BLK_STS_OK)
 			goto out_err;
@@ -8137,7 +8128,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 out_err_em:
 	free_extent_map(em);
 out_err:
-	dip->dio_bio->bi_status = status;
+	dio_bio->bi_status = status;
 	btrfs_dio_private_put(dip);
 }
 
@@ -8148,6 +8139,7 @@ static const struct iomap_ops btrfs_dio_iomap_ops = {
 
 static const struct iomap_dio_ops btrfs_dio_ops = {
 	.submit_io		= btrfs_submit_direct,
+	.bio_set		= &btrfs_dio_bioset,
 };
 
 ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
@@ -8970,6 +8962,7 @@ void __cold btrfs_destroy_cachep(void)
 	 * destroy cache.
 	 */
 	rcu_barrier();
+	bioset_exit(&btrfs_dio_bioset);
 	kmem_cache_destroy(btrfs_inode_cachep);
 	kmem_cache_destroy(btrfs_trans_handle_cachep);
 	kmem_cache_destroy(btrfs_path_cachep);
@@ -9010,6 +9003,11 @@ int __init btrfs_init_cachep(void)
 	if (!btrfs_free_space_bitmap_cachep)
 		goto fail;
 
+	if (bioset_init(&btrfs_dio_bioset, BIO_POOL_SIZE,
+			offsetof(struct btrfs_dio_private, bio),
+			BIOSET_NEED_BVECS))
+		goto fail;
+
 	return 0;
 fail:
 	btrfs_destroy_cachep();
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-04 16:23 ` [PATCH 2/5] iomap: add per-iomap_iter private data Christoph Hellwig
@ 2022-05-05  8:06   ` Nikolay Borisov
  2022-05-05 15:06     ` Christoph Hellwig
  2022-05-05 15:41   ` Darrick J. Wong
  1 sibling, 1 reply; 24+ messages in thread
From: Nikolay Borisov @ 2022-05-05  8:06 UTC (permalink / raw)
  To: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba,
	Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel



On 4.05.22 г. 19:23 ч., Christoph Hellwig wrote:
> Allow the file system to keep state for all iterations.  For now only
> wire it up for direct I/O as there is an immediate need for it there.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/iomap/direct-io.c  | 8 ++++++++
>   include/linux/iomap.h | 1 +
>   2 files changed, 9 insertions(+)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 15929690d89e3..355abe2eacc6a 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -520,6 +520,14 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>   	dio->submit.waiter = current;
>   	dio->submit.poll_bio = NULL;
>   
> +	/*
> +	 * Transfer the private data that was passed by the caller to the
> +	 * iomap_iter, and clear it in the iocb, as iocb->private will be
> +	 * used for polled bio completion later.
> +	 */
> +	iomi.private = iocb->private;
> +	WRITE_ONCE(iocb->private, NULL);

nit: Why use WRITE_ONCE here? Generaly when it's used it will suggest to 
the reader something funny is going on with accessing that variable 
without holding a particular lock?

> +
>   	if (iov_iter_rw(iter) == READ) {
>   		if (iomi.pos >= dio->i_size)
>   			goto out_free_dio;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index a5483020dad41..109c055865f73 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -188,6 +188,7 @@ struct iomap_iter {
>   	unsigned flags;
>   	struct iomap iomap;
>   	struct iomap srcmap;
> +	void *private;
>   };
>   
>   int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio
  2022-05-04 16:23 ` [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio Christoph Hellwig
@ 2022-05-05  8:12   ` Nikolay Borisov
  2022-05-05 15:07     ` Christoph Hellwig
  2022-05-05 15:52   ` David Sterba
  1 sibling, 1 reply; 24+ messages in thread
From: Nikolay Borisov @ 2022-05-05  8:12 UTC (permalink / raw)
  To: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba,
	Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel



On 4.05.22 г. 19:23 ч., Christoph Hellwig wrote:
> Create a new bio_set that contains all the per-bio private data needed
> by btrfs for direct I/O and tell the iomap code to use that instead
> of separately allocation the btrfs_dio_private structure.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/btrfs_inode.h |  25 ----------
>   fs/btrfs/ctree.h       |   1 -
>   fs/btrfs/inode.c       | 108 ++++++++++++++++++++---------------------
>   3 files changed, 53 insertions(+), 81 deletions(-)
> 
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index 32131a5d321b3..33811e896623f 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -395,31 +395,6 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
>   	return true;
>   }
>   
> -struct btrfs_dio_private {
> -	struct inode *inode;
> -
> -	/*
> -	 * Since DIO can use anonymous page, we cannot use page_offset() to
> -	 * grab the file offset, thus need a dedicated member for file offset.
> -	 */
> -	u64 file_offset;
> -	u64 disk_bytenr;

nit: You are actually removing this member when copying the struct, 
that's an independent change (albeit I'd say insignificant). Generally 
we prefer such changes to be in separate patches with rationale when the 
given member became redundant.

<snip>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: reduce memory allocation in the btrfs direct I/O path
  2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
                   ` (4 preceding siblings ...)
  2022-05-04 16:23 ` [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio Christoph Hellwig
@ 2022-05-05  8:33 ` Nikolay Borisov
  2022-05-05 15:55 ` David Sterba
  6 siblings, 0 replies; 24+ messages in thread
From: Nikolay Borisov @ 2022-05-05  8:33 UTC (permalink / raw)
  To: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba,
	Darrick J. Wong
  Cc: linux-btrfs, linux-xfs, linux-fsdevel



On 4.05.22 г. 19:23 ч., Christoph Hellwig wrote:
> Hi all,
> 
> this series adds two minor improvements to iomap that allow btrfs
> to avoid a memory allocation per read/write system call and another
> one per submitted bio.  I also have at last two other pending uses
> for the iomap functionality later on, so they are not really btrfs
> specific either.
> 
> Diffstat:
>   fs/btrfs/btrfs_inode.h |   25 --------
>   fs/btrfs/ctree.h       |    6 -
>   fs/btrfs/file.c        |    6 -
>   fs/btrfs/inode.c       |  152 +++++++++++++++++++++++--------------------------
>   fs/iomap/direct-io.c   |   26 +++++++-
>   include/linux/iomap.h  |    4 +
>   6 files changed, 104 insertions(+), 115 deletions(-)
> 


Apart from the silent disk_bytenr removal nit:

Reviewed-by: Nikolay Borisov <nborisov@suse.com>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-05  8:06   ` Nikolay Borisov
@ 2022-05-05 15:06     ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-05 15:06 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba,
	Darrick J. Wong, linux-btrfs, linux-xfs, linux-fsdevel

On Thu, May 05, 2022 at 11:06:50AM +0300, Nikolay Borisov wrote:
>> @@ -520,6 +520,14 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>>   	dio->submit.waiter = current;
>>   	dio->submit.poll_bio = NULL;
>>   +	/*
>> +	 * Transfer the private data that was passed by the caller to the
>> +	 * iomap_iter, and clear it in the iocb, as iocb->private will be
>> +	 * used for polled bio completion later.
>> +	 */
>> +	iomi.private = iocb->private;
>> +	WRITE_ONCE(iocb->private, NULL);
>
> nit: Why use WRITE_ONCE here? Generaly when it's used it will suggest to 
> the reader something funny is going on with accessing that variable without 
> holding a particular lock?

Because we use WRITE_ONCE on iocb->private later on when we use it to
store the bio that is polled for, and we really want the store that
clears it to NULL to be done before we start dealing with bio submission.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio
  2022-05-05  8:12   ` Nikolay Borisov
@ 2022-05-05 15:07     ` Christoph Hellwig
  2022-05-05 15:20       ` David Sterba
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-05 15:07 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba,
	Darrick J. Wong, linux-btrfs, linux-xfs, linux-fsdevel

On Thu, May 05, 2022 at 11:12:45AM +0300, Nikolay Borisov wrote:
> nit: You are actually removing this member when copying the struct, that's 
> an independent change (albeit I'd say insignificant). Generally we prefer 
> such changes to be in separate patches with rationale when the given member 
> became redundant.

This one actually was entirely unused, but yes, this could have been
split into another patch.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio
  2022-05-05 15:07     ` Christoph Hellwig
@ 2022-05-05 15:20       ` David Sterba
  0 siblings, 0 replies; 24+ messages in thread
From: David Sterba @ 2022-05-05 15:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Nikolay Borisov, Chris Mason, Josef Bacik, David Sterba,
	Darrick J. Wong, linux-btrfs, linux-xfs, linux-fsdevel

On Thu, May 05, 2022 at 05:07:17PM +0200, Christoph Hellwig wrote:
> On Thu, May 05, 2022 at 11:12:45AM +0300, Nikolay Borisov wrote:
> > nit: You are actually removing this member when copying the struct, that's 
> > an independent change (albeit I'd say insignificant). Generally we prefer 
> > such changes to be in separate patches with rationale when the given member 
> > became redundant.
> 
> This one actually was entirely unused, but yes, this could have been
> split into another patch.

Cleanest would be to do it in a separate patch, but as it's trivial
mentioning it in the changelog would be also sufficient. When code is
moved either within file or to another one it's better to keep it 1:1
besides some trivial renames or formatting. It's easy to overlook a line
missing. For now no need to resend, I'll add a notice to the changelog.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/5] iomap: allow the file system to provide a bio_set for direct I/O
  2022-05-04 16:23 ` [PATCH 1/5] iomap: allow the file system to provide a bio_set for direct I/O Christoph Hellwig
@ 2022-05-05 15:38   ` Darrick J. Wong
  0 siblings, 0 replies; 24+ messages in thread
From: Darrick J. Wong @ 2022-05-05 15:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs, linux-xfs,
	linux-fsdevel

On Wed, May 04, 2022 at 09:23:38AM -0700, Christoph Hellwig wrote:
> Allow the file system to provide a specific bio_set for allocating
> direct I/O bios.  This will allow file systems that use the
> ->submit_io hook to stash away additional information for file system
> use.
> 
> To make use of this additional space for information in the completion
> path, the file system needs to override the ->bi_end_io callback and
> then call back into iomap, so export iomap_dio_bio_end_io for that.

I hear reports of people (Ted) at LSF asking for better support in
porting things to iomap.  Can we document the entire process of using
additional space per bio somewhere in the header file?

/*
 * Filesystems wishing to attach private information to a directio bio
 * must provide a ->submit_io method that attaches the additional
 * information to the bio and changes the ->bi_end_io callback to a
 * custom function.  This function should, at a minimum, perform any
 * relevant post-processing of the bio and end with a call to
 * iomap_dio_bio_end_io.
 */

I'm not sure where this would go in iomap.h, since this is more about
custom bios than it is about biosets, but I think we could at least try
to seed future converter patchset authors with some reasonable recipes
so we don't end up with the sprawling messes that we have with
bufferheads.  Obviously, adding recipes is not something in scope for
this patch.

This patch looks ok, so
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D


> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/iomap/direct-io.c  | 18 ++++++++++++++----
>  include/linux/iomap.h |  3 +++
>  2 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index b08f5dc31780d..15929690d89e3 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -51,6 +51,15 @@ struct iomap_dio {
>  	};
>  };
>  
> +static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
> +		struct iomap_dio *dio, unsigned short nr_vecs, unsigned int opf)
> +{
> +	if (dio->dops && dio->dops->bio_set)
> +		return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf,
> +					GFP_KERNEL, dio->dops->bio_set);
> +	return bio_alloc(iter->iomap.bdev, nr_vecs, opf, GFP_KERNEL);
> +}
> +
>  static void iomap_dio_submit_bio(const struct iomap_iter *iter,
>  		struct iomap_dio *dio, struct bio *bio, loff_t pos)
>  {
> @@ -144,7 +153,7 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
>  	cmpxchg(&dio->error, 0, ret);
>  }
>  
> -static void iomap_dio_bio_end_io(struct bio *bio)
> +void iomap_dio_bio_end_io(struct bio *bio)
>  {
>  	struct iomap_dio *dio = bio->bi_private;
>  	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
> @@ -176,16 +185,17 @@ static void iomap_dio_bio_end_io(struct bio *bio)
>  		bio_put(bio);
>  	}
>  }
> +EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
>  
>  static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
>  		loff_t pos, unsigned len)
>  {
>  	struct inode *inode = file_inode(dio->iocb->ki_filp);
>  	struct page *page = ZERO_PAGE(0);
> -	int flags = REQ_SYNC | REQ_IDLE;
>  	struct bio *bio;
>  
> -	bio = bio_alloc(iter->iomap.bdev, 1, REQ_OP_WRITE | flags, GFP_KERNEL);
> +	bio = iomap_dio_alloc_bio(iter, dio, 1,
> +			REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
>  	fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
>  				  GFP_KERNEL);
>  	bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
> @@ -311,7 +321,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  			goto out;
>  		}
>  
> -		bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL);
> +		bio = iomap_dio_alloc_bio(iter, dio, nr_pages, bio_opf);
>  		fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
>  					  GFP_KERNEL);
>  		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index b76f0dd149fb4..a5483020dad41 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -320,6 +320,8 @@ struct iomap_dio_ops {
>  		      unsigned flags);
>  	void (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
>  		          loff_t file_offset);
> +
> +	struct bio_set *bio_set;
>  };
>  
>  /*
> @@ -349,6 +351,7 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
>  		unsigned int dio_flags, size_t done_before);
>  ssize_t iomap_dio_complete(struct iomap_dio *dio);
> +void iomap_dio_bio_end_io(struct bio *bio);
>  
>  #ifdef CONFIG_SWAP
>  struct file;
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-04 16:23 ` [PATCH 2/5] iomap: add per-iomap_iter private data Christoph Hellwig
  2022-05-05  8:06   ` Nikolay Borisov
@ 2022-05-05 15:41   ` Darrick J. Wong
  2022-05-05 15:45     ` Christoph Hellwig
  1 sibling, 1 reply; 24+ messages in thread
From: Darrick J. Wong @ 2022-05-05 15:41 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs, linux-xfs,
	linux-fsdevel

On Wed, May 04, 2022 at 09:23:39AM -0700, Christoph Hellwig wrote:
> Allow the file system to keep state for all iterations.  For now only
> wire it up for direct I/O as there is an immediate need for it there.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/iomap/direct-io.c  | 8 ++++++++
>  include/linux/iomap.h | 1 +
>  2 files changed, 9 insertions(+)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 15929690d89e3..355abe2eacc6a 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -520,6 +520,14 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  	dio->submit.waiter = current;
>  	dio->submit.poll_bio = NULL;
>  
> +	/*
> +	 * Transfer the private data that was passed by the caller to the
> +	 * iomap_iter, and clear it in the iocb, as iocb->private will be
> +	 * used for polled bio completion later.
> +	 */
> +	iomi.private = iocb->private;
> +	WRITE_ONCE(iocb->private, NULL);

Do we need to transfer it back after the bio completes?  Or is it a
feature that iocb->private changes to the bio?

--D

> +
>  	if (iov_iter_rw(iter) == READ) {
>  		if (iomi.pos >= dio->i_size)
>  			goto out_free_dio;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index a5483020dad41..109c055865f73 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -188,6 +188,7 @@ struct iomap_iter {
>  	unsigned flags;
>  	struct iomap iomap;
>  	struct iomap srcmap;
> +	void *private;
>  };
>  
>  int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-05 15:41   ` Darrick J. Wong
@ 2022-05-05 15:45     ` Christoph Hellwig
  2022-05-05 16:32       ` Darrick J. Wong
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-05 15:45 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba,
	linux-btrfs, linux-xfs, linux-fsdevel

On Thu, May 05, 2022 at 08:41:26AM -0700, Darrick J. Wong wrote:
> > +	 */
> > +	iomi.private = iocb->private;
> > +	WRITE_ONCE(iocb->private, NULL);
> 
> Do we need to transfer it back after the bio completes?  Or is it a
> feature that iocb->private changes to the bio?

No need to transfer it back.  It ist just a creative way to pass private
data in.  Initially I just added yet another argument to iomap_dio_rw,
and maybe I should just go back to that to make the things easier to
follow.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio
  2022-05-04 16:23 ` [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio Christoph Hellwig
  2022-05-05  8:12   ` Nikolay Borisov
@ 2022-05-05 15:52   ` David Sterba
  1 sibling, 0 replies; 24+ messages in thread
From: David Sterba @ 2022-05-05 15:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong,
	linux-btrfs, linux-xfs, linux-fsdevel

On Wed, May 04, 2022 at 09:23:42AM -0700, Christoph Hellwig wrote:
> -	if (!write) {
> +	if (!write && !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
>  		/*
>  		 * Load the csums up front to reduce csum tree searches and
>  		 * contention when submitting bios.
> -		 *
> -		 * If we have csums disabled this will do nothing.
>  		 */
> +		status = BLK_STS_RESOURCE;
> +		dip->csums = kzalloc(fs_info->csum_size *
> +			(dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits),
> +			GFP_NOFS);

This should be either kmalloc_array or kcalloc, otherwise OK.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: reduce memory allocation in the btrfs direct I/O path
  2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
                   ` (5 preceding siblings ...)
  2022-05-05  8:33 ` reduce memory allocation in the btrfs direct I/O path Nikolay Borisov
@ 2022-05-05 15:55 ` David Sterba
  2022-05-06 17:18   ` Darrick J. Wong
  6 siblings, 1 reply; 24+ messages in thread
From: David Sterba @ 2022-05-05 15:55 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Chris Mason, Josef Bacik, David Sterba, Darrick J. Wong,
	linux-btrfs, linux-xfs, linux-fsdevel

On Wed, May 04, 2022 at 09:23:37AM -0700, Christoph Hellwig wrote:
> Hi all,
> 
> this series adds two minor improvements to iomap that allow btrfs
> to avoid a memory allocation per read/write system call and another
> one per submitted bio.  I also have at last two other pending uses
> for the iomap functionality later on, so they are not really btrfs
> specific either.

The series is reasonably short so I'd like to add it to 5.20 queue,
provided that the iomap patches get acked by Darrick. Any fixups I'd
rather fold into my local branch, no need to resend unless there are
significant updates.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-05 15:45     ` Christoph Hellwig
@ 2022-05-05 16:32       ` Darrick J. Wong
  2022-05-05 18:15         ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: Darrick J. Wong @ 2022-05-05 16:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs, linux-xfs,
	linux-fsdevel

On Thu, May 05, 2022 at 05:45:57PM +0200, Christoph Hellwig wrote:
> On Thu, May 05, 2022 at 08:41:26AM -0700, Darrick J. Wong wrote:
> > > +	 */
> > > +	iomi.private = iocb->private;
> > > +	WRITE_ONCE(iocb->private, NULL);
> > 
> > Do we need to transfer it back after the bio completes?  Or is it a
> > feature that iocb->private changes to the bio?
> 
> No need to transfer it back.  It ist just a creative way to pass private
> data in.  Initially I just added yet another argument to iomap_dio_rw,
> and maybe I should just go back to that to make the things easier to
> follow.

Hmm.  Who owns iocb->private?  AFAICT there are two users of it -- the
directio code uses it to store bios for polling; and then there's ocfs2,
which apparently uses it for iocb lock state(!) flags.

Getting back to iomap, I think the comment before __iomap_dio_rw should
state that iocb->private will be transferred to iter->private to make
that relationship more obvious, in case ocfs2 ever stumbles into iomap
and explodes on impact.

--D

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-05 16:32       ` Darrick J. Wong
@ 2022-05-05 18:15         ` Christoph Hellwig
  2022-05-05 18:18           ` Darrick J. Wong
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-05 18:15 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christoph Hellwig, Chris Mason, Josef Bacik, David Sterba,
	linux-btrfs, linux-xfs, linux-fsdevel

On Thu, May 05, 2022 at 09:32:19AM -0700, Darrick J. Wong wrote:
> > No need to transfer it back.  It ist just a creative way to pass private
> > data in.  Initially I just added yet another argument to iomap_dio_rw,
> > and maybe I should just go back to that to make the things easier to
> > follow.
> 
> Hmm.  Who owns iocb->private?  AFAICT there are two users of it -- the
> directio code uses it to store bios for polling; and then there's ocfs2,
> which apparently uses it for iocb lock state(!) flags.

Yeah.

> Getting back to iomap, I think the comment before __iomap_dio_rw should
> state that iocb->private will be transferred to iter->private to make
> that relationship more obvious, in case ocfs2 ever stumbles into iomap
> and explodes on impact.

I think I'll just look into passing an extra argument instead.  It
is pretty clear that using iocb->private was a little too clever and
takes experienced file system developers way too much time to understand.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/5] iomap: add per-iomap_iter private data
  2022-05-05 18:15         ` Christoph Hellwig
@ 2022-05-05 18:18           ` Darrick J. Wong
  0 siblings, 0 replies; 24+ messages in thread
From: Darrick J. Wong @ 2022-05-05 18:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Chris Mason, Josef Bacik, David Sterba, linux-btrfs, linux-xfs,
	linux-fsdevel

On Thu, May 05, 2022 at 08:15:43PM +0200, Christoph Hellwig wrote:
> On Thu, May 05, 2022 at 09:32:19AM -0700, Darrick J. Wong wrote:
> > > No need to transfer it back.  It ist just a creative way to pass private
> > > data in.  Initially I just added yet another argument to iomap_dio_rw,
> > > and maybe I should just go back to that to make the things easier to
> > > follow.
> > 
> > Hmm.  Who owns iocb->private?  AFAICT there are two users of it -- the
> > directio code uses it to store bios for polling; and then there's ocfs2,
> > which apparently uses it for iocb lock state(!) flags.
> 
> Yeah.
> 
> > Getting back to iomap, I think the comment before __iomap_dio_rw should
> > state that iocb->private will be transferred to iter->private to make
> > that relationship more obvious, in case ocfs2 ever stumbles into iomap
> > and explodes on impact.
> 
> I think I'll just look into passing an extra argument instead.  It
> is pretty clear that using iocb->private was a little too clever and
> takes experienced file system developers way too much time to understand.

Ok.

--D

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: reduce memory allocation in the btrfs direct I/O path
  2022-05-05 15:55 ` David Sterba
@ 2022-05-06 17:18   ` Darrick J. Wong
  2022-05-07  5:26     ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: Darrick J. Wong @ 2022-05-06 17:18 UTC (permalink / raw)
  To: dsterba, Christoph Hellwig, Chris Mason, Josef Bacik,
	David Sterba, linux-btrfs, linux-xfs, linux-fsdevel

On Thu, May 05, 2022 at 05:55:29PM +0200, David Sterba wrote:
> On Wed, May 04, 2022 at 09:23:37AM -0700, Christoph Hellwig wrote:
> > Hi all,
> > 
> > this series adds two minor improvements to iomap that allow btrfs
> > to avoid a memory allocation per read/write system call and another
> > one per submitted bio.  I also have at last two other pending uses
> > for the iomap functionality later on, so they are not really btrfs
> > specific either.
> 
> The series is reasonably short so I'd like to add it to 5.20 queue,
> provided that the iomap patches get acked by Darrick. Any fixups I'd
> rather fold into my local branch, no need to resend unless there are
> significant updates.

Hm.  I'm planning on pushing out a (very late) iomap-5.19-merge branch,
since (AFAICT) these changes are mostly plumbing.  Do you want me to
push the first three patches of this series for 5.19?

--D

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: reduce memory allocation in the btrfs direct I/O path
  2022-05-06 17:18   ` Darrick J. Wong
@ 2022-05-07  5:26     ` Christoph Hellwig
  2022-05-09 18:46       ` David Sterba
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2022-05-07  5:26 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: dsterba, Christoph Hellwig, Chris Mason, Josef Bacik,
	David Sterba, linux-btrfs, linux-xfs, linux-fsdevel

On Fri, May 06, 2022 at 10:18:03AM -0700, Darrick J. Wong wrote:
> > The series is reasonably short so I'd like to add it to 5.20 queue,
> > provided that the iomap patches get acked by Darrick. Any fixups I'd
> > rather fold into my local branch, no need to resend unless there are
> > significant updates.
> 
> Hm.  I'm planning on pushing out a (very late) iomap-5.19-merge branch,
> since (AFAICT) these changes are mostly plumbing.  Do you want me to
> push the first three patches of this series for 5.19?

Given that we have no conflicts it might be easiest to just merge the
whole series through the btrfs tree.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: reduce memory allocation in the btrfs direct I/O path
  2022-05-07  5:26     ` Christoph Hellwig
@ 2022-05-09 18:46       ` David Sterba
  2022-05-10  3:33         ` Darrick J. Wong
  0 siblings, 1 reply; 24+ messages in thread
From: David Sterba @ 2022-05-09 18:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Darrick J. Wong, dsterba, Chris Mason, Josef Bacik, David Sterba,
	linux-btrfs, linux-xfs, linux-fsdevel

On Sat, May 07, 2022 at 07:26:49AM +0200, Christoph Hellwig wrote:
> On Fri, May 06, 2022 at 10:18:03AM -0700, Darrick J. Wong wrote:
> > > The series is reasonably short so I'd like to add it to 5.20 queue,
                                                              ^^^^
Sorry, I meant 5.19, ie. the one that's about to start soon.

> > > provided that the iomap patches get acked by Darrick. Any fixups I'd
> > > rather fold into my local branch, no need to resend unless there are
> > > significant updates.
> > 
> > Hm.  I'm planning on pushing out a (very late) iomap-5.19-merge branch,
> > since (AFAICT) these changes are mostly plumbing.  Do you want me to
> > push the first three patches of this series for 5.19?
> 
> Given that we have no conflicts it might be easiest to just merge the
> whole series through the btrfs tree.

Yeah, I'd rather take it via the btrfs tree.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: reduce memory allocation in the btrfs direct I/O path
  2022-05-09 18:46       ` David Sterba
@ 2022-05-10  3:33         ` Darrick J. Wong
  0 siblings, 0 replies; 24+ messages in thread
From: Darrick J. Wong @ 2022-05-10  3:33 UTC (permalink / raw)
  To: dsterba, Christoph Hellwig, Chris Mason, Josef Bacik,
	David Sterba, linux-btrfs, linux-xfs, linux-fsdevel

On Mon, May 09, 2022 at 08:46:50PM +0200, David Sterba wrote:
> On Sat, May 07, 2022 at 07:26:49AM +0200, Christoph Hellwig wrote:
> > On Fri, May 06, 2022 at 10:18:03AM -0700, Darrick J. Wong wrote:
> > > > The series is reasonably short so I'd like to add it to 5.20 queue,
>                                                               ^^^^
> Sorry, I meant 5.19, ie. the one that's about to start soon.
> 
> > > > provided that the iomap patches get acked by Darrick. Any fixups I'd
> > > > rather fold into my local branch, no need to resend unless there are
> > > > significant updates.
> > > 
> > > Hm.  I'm planning on pushing out a (very late) iomap-5.19-merge branch,
> > > since (AFAICT) these changes are mostly plumbing.  Do you want me to
> > > push the first three patches of this series for 5.19?
> > 
> > Given that we have no conflicts it might be easiest to just merge the
> > whole series through the btrfs tree.
> 
> Yeah, I'd rather take it via the btrfs tree.

Ah, *5.19*.  Yes, this plan now sounds good to me!

(I had wondered, 5.20 seemed like an awfully long time to wait for
something as straightforward as that.)

--D

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2022-05-10  3:36 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-04 16:23 reduce memory allocation in the btrfs direct I/O path Christoph Hellwig
2022-05-04 16:23 ` [PATCH 1/5] iomap: allow the file system to provide a bio_set for direct I/O Christoph Hellwig
2022-05-05 15:38   ` Darrick J. Wong
2022-05-04 16:23 ` [PATCH 2/5] iomap: add per-iomap_iter private data Christoph Hellwig
2022-05-05  8:06   ` Nikolay Borisov
2022-05-05 15:06     ` Christoph Hellwig
2022-05-05 15:41   ` Darrick J. Wong
2022-05-05 15:45     ` Christoph Hellwig
2022-05-05 16:32       ` Darrick J. Wong
2022-05-05 18:15         ` Christoph Hellwig
2022-05-05 18:18           ` Darrick J. Wong
2022-05-04 16:23 ` [PATCH 3/5] btrfs: add a btrfs_dio_rw wrapper Christoph Hellwig
2022-05-04 16:23 ` [PATCH 4/5] btrfs: allocate dio_data on stack Christoph Hellwig
2022-05-04 16:23 ` [PATCH 5/5] btrfs: allocate the btrfs_dio_private as part of the iomap dio bio Christoph Hellwig
2022-05-05  8:12   ` Nikolay Borisov
2022-05-05 15:07     ` Christoph Hellwig
2022-05-05 15:20       ` David Sterba
2022-05-05 15:52   ` David Sterba
2022-05-05  8:33 ` reduce memory allocation in the btrfs direct I/O path Nikolay Borisov
2022-05-05 15:55 ` David Sterba
2022-05-06 17:18   ` Darrick J. Wong
2022-05-07  5:26     ` Christoph Hellwig
2022-05-09 18:46       ` David Sterba
2022-05-10  3:33         ` Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.