Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
* [RFC 0/2] Avoid indirect function calls in iomap
@ 2020-07-28 17:32 Matthew Wilcox (Oracle)
  2020-07-28 17:32 ` [PATCH 1/2] iomap: Add iomap_iter Matthew Wilcox (Oracle)
  2020-07-28 17:32 ` [PATCH 2/2] iomap: Convert readahead to iomap_iter Matthew Wilcox (Oracle)
  0 siblings, 2 replies; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-07-28 17:32 UTC (permalink / raw)
  To: Christoph Hellwig, Darrick J. Wong
  Cc: Matthew Wilcox (Oracle), linux-xfs, linux-fsdevel

This RFC converts indirect function calls into direct function calls.
It converts one user as an example (readahead).  Converting more users of
iomap_apply() would yield a more advantageous diffstat.  It's actually
slightly more code in each filesystem, but indirect function calls
are pretty expensive.  It also flattens the call graph (see the patch
2 changelog).  This all needs more refinement, but I'm about to start
work on overhauling the block size < page size support, and I thought
I'd send this out now.

It does survive a basic xfstests run.

Matthew Wilcox (Oracle) (2):
  iomap: Add iomap_iter
  iomap: Convert readahead to iomap_iter

 fs/iomap/apply.c       | 29 +++++++++++++++
 fs/iomap/buffered-io.c | 82 ++++++++++++++----------------------------
 fs/xfs/xfs_aops.c      |  9 ++++-
 fs/xfs/xfs_iomap.c     | 15 ++++++++
 fs/xfs/xfs_iomap.h     |  2 ++
 fs/zonefs/super.c      | 20 ++++++++++-
 include/linux/iomap.h  | 67 +++++++++++++++++++++++++++++++++-
 7 files changed, 165 insertions(+), 59 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] iomap: Add iomap_iter
  2020-07-28 17:32 [RFC 0/2] Avoid indirect function calls in iomap Matthew Wilcox (Oracle)
@ 2020-07-28 17:32 ` Matthew Wilcox (Oracle)
  2020-08-11 21:01   ` Darrick J. Wong
  2020-07-28 17:32 ` [PATCH 2/2] iomap: Convert readahead to iomap_iter Matthew Wilcox (Oracle)
  1 sibling, 1 reply; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-07-28 17:32 UTC (permalink / raw)
  To: Christoph Hellwig, Darrick J. Wong
  Cc: Matthew Wilcox (Oracle), linux-xfs, linux-fsdevel

The iomap_iter provides a convenient way to package up and maintain
all the arguments to the various mapping and operation functions.
iomi_advance() moves the iterator forward to the next chunk of the file.
This approach has only one callback to the filesystem -- the iomap_next_t
instead of the separate iomap_begin / iomap_end calls.  This slightly
complicates the filesystems, but is more efficient.  The next function
will be called even after an error has occurred to allow the filesystem
the opportunity to clean up.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/apply.c      | 29 ++++++++++++++++++++++
 include/linux/iomap.h | 57 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
index 76925b40b5fd..c83dcd203558 100644
--- a/fs/iomap/apply.c
+++ b/fs/iomap/apply.c
@@ -92,3 +92,32 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 
 	return written ? written : ret;
 }
+
+bool iomi_advance(struct iomap_iter *iomi, int err)
+{
+	struct iomap *iomap = &iomi->iomap;
+
+	if (likely(!err)) {
+		iomi->pos += iomi->copied;
+		iomi->len -= iomi->copied;
+		iomi->ret += iomi->copied;
+		if (!iomi->len)
+			return false;
+		iomi->copied = 0;
+		if (WARN_ON(iomap->offset > iomi->pos))
+			err = -EIO;
+		if (WARN_ON(iomap->offset + iomap->length <= iomi->pos))
+			err = -EIO;
+	}
+
+	if (unlikely(err < 0)) {
+		if (iomi->copied < 0)
+			return false;
+		/* Give the body a chance to see the error and clean up */
+		iomi->copied = err;
+		if (!iomi->ret)
+			iomi->ret = err;
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(iomi_advance);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 4d1d3c3469e9..fe58e68ec0c1 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -142,6 +142,63 @@ struct iomap_ops {
 			ssize_t written, unsigned flags, struct iomap *iomap);
 };
 
+/**
+ * struct iomap_iter - Iterate through a range of a file.
+ * @inode: Set at the start of the iteration and should not change.
+ * @pos: The current file position we are operating on.  It is updated by
+ *	calls to iomap_iter().  Treat as read-only in the body.
+ * @len: The remaining length of the file segment we're operating on.
+ *	It is updated at the same time as @pos.
+ * @ret: What we want our declaring function to return.  It is initialised
+ *	to zero and is the cumulative number of bytes processed so far.
+ *	It is updated at the same time as @pos.
+ * @copied: The number of bytes operated on by the body in the most
+ *	recent iteration.  If no bytes were operated on, it may be set to
+ *	a negative errno.  0 is treated as -EIO.
+ * @flags: Zero or more of the iomap_begin flags above.
+ * @iomap: ...
+ * @srcma:s ...
+ */
+struct iomap_iter {
+	struct inode *inode;
+	loff_t pos;
+	u64 len;
+	loff_t ret;
+	ssize_t copied;
+	unsigned flags;
+	struct iomap iomap;
+	struct iomap srcmap;
+};
+
+#define IOMAP_ITER(name, _inode, _pos, _len, _flags)			\
+	struct iomap_iter name = {					\
+		.inode = _inode,					\
+		.pos = _pos,						\
+		.len = _len,						\
+		.flags = _flags,					\
+	}
+
+typedef int (*iomap_next_t)(const struct iomap_iter *,
+		struct iomap *iomap, struct iomap *srcmap);
+bool iomi_advance(struct iomap_iter *iomi, int err);
+
+static inline bool iomap_iter(struct iomap_iter *iomi, iomap_next_t next)
+{
+	if (iomi->ret && iomi->copied == 0)
+		iomi->copied = -EIO;
+
+	return iomi_advance(iomi, next(iomi, &iomi->iomap, &iomi->srcmap));
+}
+
+static inline u64 iomap_length(const struct iomap_iter *iomi)
+{
+	u64 end = iomi->iomap.offset + iomi->iomap.length;
+
+	if (iomi->srcmap.type != IOMAP_HOLE)
+		end = min(end, iomi->srcmap.offset + iomi->srcmap.length);
+	return min(iomi->len, end - iomi->pos);
+}
+
 /*
  * Main iomap iterator function.
  */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] iomap: Convert readahead to iomap_iter
  2020-07-28 17:32 [RFC 0/2] Avoid indirect function calls in iomap Matthew Wilcox (Oracle)
  2020-07-28 17:32 ` [PATCH 1/2] iomap: Add iomap_iter Matthew Wilcox (Oracle)
@ 2020-07-28 17:32 ` Matthew Wilcox (Oracle)
  2020-08-11 20:56   ` Darrick J. Wong
  1 sibling, 1 reply; 7+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-07-28 17:32 UTC (permalink / raw)
  To: Christoph Hellwig, Darrick J. Wong
  Cc: Matthew Wilcox (Oracle), linux-xfs, linux-fsdevel

This approach removes at least two indirect function calls from the
readahead path.  Previous call chain (indirect function calls marked *):

xfs_vm_readahead
  iomap_readahead
    iomap_apply
      xfs_read_iomap_begin [*]
      iomap_readahead_actor [*]
        iomap_readpage_actor

New call chain:

xfs_vm_readahead
  xfs_iomap_next_read
  iomi_advance
  iomap_readahead
    iomap_readpage_actor

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 82 ++++++++++++++----------------------------
 fs/xfs/xfs_aops.c      |  9 ++++-
 fs/xfs/xfs_iomap.c     | 15 ++++++++
 fs/xfs/xfs_iomap.h     |  2 ++
 fs/zonefs/super.c      | 20 ++++++++++-
 include/linux/iomap.h  | 10 +++++-
 6 files changed, 79 insertions(+), 59 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index bcfc288dba3f..fff23ed6a682 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -206,13 +206,6 @@ iomap_read_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
-struct iomap_readpage_ctx {
-	struct page		*cur_page;
-	bool			cur_page_in_bio;
-	struct bio		*bio;
-	struct readahead_control *rac;
-};
-
 static void
 iomap_read_inline_data(struct inode *inode, struct page *page,
 		struct iomap *iomap)
@@ -369,35 +362,10 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
 }
 EXPORT_SYMBOL_GPL(iomap_readpage);
 
-static loff_t
-iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap, struct iomap *srcmap)
-{
-	struct iomap_readpage_ctx *ctx = data;
-	loff_t done, ret;
-
-	for (done = 0; done < length; done += ret) {
-		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
-			if (!ctx->cur_page_in_bio)
-				unlock_page(ctx->cur_page);
-			put_page(ctx->cur_page);
-			ctx->cur_page = NULL;
-		}
-		if (!ctx->cur_page) {
-			ctx->cur_page = readahead_page(ctx->rac);
-			ctx->cur_page_in_bio = false;
-		}
-		ret = iomap_readpage_actor(inode, pos + done, length - done,
-				ctx, iomap, srcmap);
-	}
-
-	return done;
-}
-
 /**
  * iomap_readahead - Attempt to read pages from a file.
+ * @iomi: The iomap iterator for this operation.
  * @rac: Describes the pages to be read.
- * @ops: The operations vector for the filesystem.
  *
  * This function is for filesystems to call to implement their readahead
  * address_space operation.
@@ -409,35 +377,37 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
  * function is called with memalloc_nofs set, so allocations will not cause
  * the filesystem to be reentered.
  */
-void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
+loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
 {
-	struct inode *inode = rac->mapping->host;
-	loff_t pos = readahead_pos(rac);
-	loff_t length = readahead_length(rac);
-	struct iomap_readpage_ctx ctx = {
-		.rac	= rac,
-	};
-
-	trace_iomap_readahead(inode, readahead_count(rac));
+	loff_t done, ret, length = iomap_length(iomi);
 
-	while (length > 0) {
-		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
-				&ctx, iomap_readahead_actor);
-		if (ret <= 0) {
-			WARN_ON_ONCE(ret == 0);
-			break;
+	for (done = 0; done < length; done += ret) {
+		if (ctx->cur_page && offset_in_page(iomi->pos + done) == 0) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+			ctx->cur_page = NULL;
 		}
-		pos += ret;
-		length -= ret;
+		if (!ctx->cur_page) {
+			ctx->cur_page = readahead_page(ctx->rac);
+			ctx->cur_page_in_bio = false;
+		}
+		ret = iomap_readpage_actor(iomi->inode, iomi->pos + done,
+				length - done, ctx,
+				&iomi->iomap, &iomi->srcmap);
 	}
 
-	if (ctx.bio)
-		submit_bio(ctx.bio);
-	if (ctx.cur_page) {
-		if (!ctx.cur_page_in_bio)
-			unlock_page(ctx.cur_page);
-		put_page(ctx.cur_page);
+	if (iomi->len == done) {
+		if (ctx->bio)
+			submit_bio(ctx->bio);
+		if (ctx->cur_page) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+		}
 	}
+
+	return done;
 }
 EXPORT_SYMBOL_GPL(iomap_readahead);
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index b35611882ff9..2884752e40e8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -625,7 +625,14 @@ STATIC void
 xfs_vm_readahead(
 	struct readahead_control	*rac)
 {
-	iomap_readahead(rac, &xfs_read_iomap_ops);
+	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
+			readahead_length(rac), 0);
+	struct iomap_readpage_ctx ctx = {
+		.rac = rac,
+	};
+
+	while (iomap_iter(&iomi, xfs_iomap_next_read))
+		iomi.copied = iomap_readahead(&iomi, &ctx);
 }
 
 static int
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0e3f62cde375..66f2fcaf136e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1150,6 +1150,21 @@ const struct iomap_ops xfs_read_iomap_ops = {
 	.iomap_begin		= xfs_read_iomap_begin,
 };
 
+int
+xfs_iomap_next_read(
+	const struct iomap_iter *iomi,
+	struct iomap		*iomap,
+	struct iomap		*srcmap)
+{
+	if (iomi->copied < 0)
+		return iomi->copied;
+	if (iomi->copied >= iomi->len)
+		return 0;
+
+	return xfs_read_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
+			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
+}
+
 static int
 xfs_seek_iomap_begin(
 	struct inode		*inode,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 7d3703556d0e..1b1fa225e938 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -46,4 +46,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
 extern const struct iomap_ops xfs_seek_iomap_ops;
 extern const struct iomap_ops xfs_xattr_iomap_ops;
 
+int xfs_iomap_next_read(const struct iomap_iter *iomi, struct iomap *iomap,
+		struct iomap *srcmap);
 #endif /* __XFS_IOMAP_H__*/
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 07bc42d62673..4842b85ce36d 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -70,6 +70,17 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	return 0;
 }
 
+static int zonefs_iomap_next(const struct iomap_iter *iomi,
+		struct iomap *iomap, struct iomap *srcmap)
+{
+	if (iomi->copied < 0)
+		return iomi->copied;
+	if (iomi->copied >= iomi->len)
+		return 0;
+	return zonefs_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
+			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
+}
+
 static const struct iomap_ops zonefs_iomap_ops = {
 	.iomap_begin	= zonefs_iomap_begin,
 };
@@ -81,7 +92,14 @@ static int zonefs_readpage(struct file *unused, struct page *page)
 
 static void zonefs_readahead(struct readahead_control *rac)
 {
-	iomap_readahead(rac, &zonefs_iomap_ops);
+	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
+			readahead_length(rac), 0);
+	struct iomap_readpage_ctx ctx = {
+		.rac = rac,
+	};
+
+	while (iomap_iter(&iomi, zonefs_iomap_next))
+		iomi.copied = iomap_readahead(&iomi, &ctx);
 }
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index fe58e68ec0c1..dd9bfed85c4f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -212,7 +212,6 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
-void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 int iomap_set_page_dirty(struct page *page);
 int iomap_is_partially_uptodate(struct page *page, unsigned long from,
 		unsigned long count);
@@ -299,6 +298,15 @@ int iomap_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops);
 
+struct iomap_readpage_ctx {
+	struct page		*cur_page;
+	bool			cur_page_in_bio;
+	struct bio		*bio;
+	struct readahead_control *rac;
+};
+
+loff_t iomap_readahead(struct iomap_iter *, struct iomap_readpage_ctx *);
+
 /*
  * Flags for direct I/O ->end_io:
  */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] iomap: Convert readahead to iomap_iter
  2020-07-28 17:32 ` [PATCH 2/2] iomap: Convert readahead to iomap_iter Matthew Wilcox (Oracle)
@ 2020-08-11 20:56   ` Darrick J. Wong
  2020-08-11 22:31     ` Matthew Wilcox
  0 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2020-08-11 20:56 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: Christoph Hellwig, linux-xfs, linux-fsdevel

On Tue, Jul 28, 2020 at 06:32:15PM +0100, Matthew Wilcox (Oracle) wrote:
> This approach removes at least two indirect function calls from the
> readahead path.  Previous call chain (indirect function calls marked *):
> 
> xfs_vm_readahead
>   iomap_readahead
>     iomap_apply
>       xfs_read_iomap_begin [*]
>       iomap_readahead_actor [*]
>         iomap_readpage_actor
> 
> New call chain:
> 
> xfs_vm_readahead
>   xfs_iomap_next_read
>   iomi_advance
>   iomap_readahead
>     iomap_readpage_actor

I mostly like this, with a few comments...

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 82 ++++++++++++++----------------------------
>  fs/xfs/xfs_aops.c      |  9 ++++-
>  fs/xfs/xfs_iomap.c     | 15 ++++++++
>  fs/xfs/xfs_iomap.h     |  2 ++
>  fs/zonefs/super.c      | 20 ++++++++++-
>  include/linux/iomap.h  | 10 +++++-
>  6 files changed, 79 insertions(+), 59 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index bcfc288dba3f..fff23ed6a682 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -206,13 +206,6 @@ iomap_read_end_io(struct bio *bio)
>  	bio_put(bio);
>  }
>  
> -struct iomap_readpage_ctx {
> -	struct page		*cur_page;
> -	bool			cur_page_in_bio;
> -	struct bio		*bio;
> -	struct readahead_control *rac;
> -};
> -
>  static void
>  iomap_read_inline_data(struct inode *inode, struct page *page,
>  		struct iomap *iomap)
> @@ -369,35 +362,10 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
>  }
>  EXPORT_SYMBOL_GPL(iomap_readpage);
>  
> -static loff_t
> -iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap, struct iomap *srcmap)
> -{
> -	struct iomap_readpage_ctx *ctx = data;
> -	loff_t done, ret;
> -
> -	for (done = 0; done < length; done += ret) {
> -		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
> -			if (!ctx->cur_page_in_bio)
> -				unlock_page(ctx->cur_page);
> -			put_page(ctx->cur_page);
> -			ctx->cur_page = NULL;
> -		}
> -		if (!ctx->cur_page) {
> -			ctx->cur_page = readahead_page(ctx->rac);
> -			ctx->cur_page_in_bio = false;
> -		}
> -		ret = iomap_readpage_actor(inode, pos + done, length - done,
> -				ctx, iomap, srcmap);
> -	}
> -
> -	return done;
> -}
> -
>  /**
>   * iomap_readahead - Attempt to read pages from a file.
> + * @iomi: The iomap iterator for this operation.
>   * @rac: Describes the pages to be read.
> - * @ops: The operations vector for the filesystem.
>   *
>   * This function is for filesystems to call to implement their readahead
>   * address_space operation.
> @@ -409,35 +377,37 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
>   * function is called with memalloc_nofs set, so allocations will not cause
>   * the filesystem to be reentered.
>   */
> -void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
> +loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
>  {
> -	struct inode *inode = rac->mapping->host;
> -	loff_t pos = readahead_pos(rac);
> -	loff_t length = readahead_length(rac);
> -	struct iomap_readpage_ctx ctx = {
> -		.rac	= rac,
> -	};
> -
> -	trace_iomap_readahead(inode, readahead_count(rac));
> +	loff_t done, ret, length = iomap_length(iomi);
>  
> -	while (length > 0) {
> -		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
> -				&ctx, iomap_readahead_actor);
> -		if (ret <= 0) {
> -			WARN_ON_ONCE(ret == 0);
> -			break;
> +	for (done = 0; done < length; done += ret) {
> +		if (ctx->cur_page && offset_in_page(iomi->pos + done) == 0) {
> +			if (!ctx->cur_page_in_bio)
> +				unlock_page(ctx->cur_page);
> +			put_page(ctx->cur_page);
> +			ctx->cur_page = NULL;
>  		}
> -		pos += ret;
> -		length -= ret;
> +		if (!ctx->cur_page) {
> +			ctx->cur_page = readahead_page(ctx->rac);
> +			ctx->cur_page_in_bio = false;
> +		}
> +		ret = iomap_readpage_actor(iomi->inode, iomi->pos + done,
> +				length - done, ctx,
> +				&iomi->iomap, &iomi->srcmap);
>  	}
>  
> -	if (ctx.bio)
> -		submit_bio(ctx.bio);
> -	if (ctx.cur_page) {
> -		if (!ctx.cur_page_in_bio)
> -			unlock_page(ctx.cur_page);
> -		put_page(ctx.cur_page);
> +	if (iomi->len == done) {
> +		if (ctx->bio)
> +			submit_bio(ctx->bio);
> +		if (ctx->cur_page) {
> +			if (!ctx->cur_page_in_bio)
> +				unlock_page(ctx->cur_page);
> +			put_page(ctx->cur_page);
> +		}
>  	}
> +
> +	return done;
>  }
>  EXPORT_SYMBOL_GPL(iomap_readahead);
>  
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index b35611882ff9..2884752e40e8 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -625,7 +625,14 @@ STATIC void
>  xfs_vm_readahead(
>  	struct readahead_control	*rac)
>  {
> -	iomap_readahead(rac, &xfs_read_iomap_ops);
> +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> +			readahead_length(rac), 0);
> +	struct iomap_readpage_ctx ctx = {
> +		.rac = rac,
> +	};
> +
> +	while (iomap_iter(&iomi, xfs_iomap_next_read))
> +		iomi.copied = iomap_readahead(&iomi, &ctx);

Why not have iomap_readahead set iomi.copied on its way out?  The actor
function is supposed to set iomi.ret if an error happens, right?

Oh wait no, the actor function returns a positive copied value, or a
negative error code, and then it's up to the _next_read function to
notice if copied is negative, stuff it in ret, and then return false to
stop the iteration?

>  }
>  
>  static int
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 0e3f62cde375..66f2fcaf136e 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -1150,6 +1150,21 @@ const struct iomap_ops xfs_read_iomap_ops = {
>  	.iomap_begin		= xfs_read_iomap_begin,
>  };
>  
> +int
> +xfs_iomap_next_read(
> +	const struct iomap_iter *iomi,
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)

Aren't these last two parameters already in the iomap iter?
Are they passed separately to work around the pointer being const?

> +{
> +	if (iomi->copied < 0)
> +		return iomi->copied;

Is this boilerplate going to end up in every single iomap_next_t
function?  If so, it should probably just go in iomap_iter prior to the
next() call, right?

I also wonder if these functions (and the typedef) ought to be called
iomap_iter_advance_t since that's what they do -- pick up the status
from the last round, and advance the iterator to the next mapping that
we want to process.

> +	if (iomi->copied >= iomi->len)
> +		return 0;

Er... if we copied more than we asked for, doesn't that imply something
bad just happened?

> +
> +	return xfs_read_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);

Would be kinda nice if you could just pass the whole iomap_iter, but I
get that we're probably stuck with this until the entirety gets
converted.

--D

> +}
> +
>  static int
>  xfs_seek_iomap_begin(
>  	struct inode		*inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 7d3703556d0e..1b1fa225e938 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -46,4 +46,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
>  extern const struct iomap_ops xfs_seek_iomap_ops;
>  extern const struct iomap_ops xfs_xattr_iomap_ops;
>  
> +int xfs_iomap_next_read(const struct iomap_iter *iomi, struct iomap *iomap,
> +		struct iomap *srcmap);
>  #endif /* __XFS_IOMAP_H__*/
> diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
> index 07bc42d62673..4842b85ce36d 100644
> --- a/fs/zonefs/super.c
> +++ b/fs/zonefs/super.c
> @@ -70,6 +70,17 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  	return 0;
>  }
>  
> +static int zonefs_iomap_next(const struct iomap_iter *iomi,
> +		struct iomap *iomap, struct iomap *srcmap)
> +{
> +	if (iomi->copied < 0)
> +		return iomi->copied;
> +	if (iomi->copied >= iomi->len)
> +		return 0;
> +	return zonefs_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
> +}
> +
>  static const struct iomap_ops zonefs_iomap_ops = {
>  	.iomap_begin	= zonefs_iomap_begin,
>  };
> @@ -81,7 +92,14 @@ static int zonefs_readpage(struct file *unused, struct page *page)
>  
>  static void zonefs_readahead(struct readahead_control *rac)
>  {
> -	iomap_readahead(rac, &zonefs_iomap_ops);
> +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> +			readahead_length(rac), 0);
> +	struct iomap_readpage_ctx ctx = {
> +		.rac = rac,
> +	};
> +
> +	while (iomap_iter(&iomi, zonefs_iomap_next))
> +		iomi.copied = iomap_readahead(&iomi, &ctx);
>  }
>  
>  /*
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index fe58e68ec0c1..dd9bfed85c4f 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -212,7 +212,6 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		const struct iomap_ops *ops);
>  int iomap_readpage(struct page *page, const struct iomap_ops *ops);
> -void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
>  int iomap_set_page_dirty(struct page *page);
>  int iomap_is_partially_uptodate(struct page *page, unsigned long from,
>  		unsigned long count);
> @@ -299,6 +298,15 @@ int iomap_writepages(struct address_space *mapping,
>  		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
>  		const struct iomap_writeback_ops *ops);
>  
> +struct iomap_readpage_ctx {
> +	struct page		*cur_page;
> +	bool			cur_page_in_bio;
> +	struct bio		*bio;
> +	struct readahead_control *rac;
> +};
> +
> +loff_t iomap_readahead(struct iomap_iter *, struct iomap_readpage_ctx *);
> +
>  /*
>   * Flags for direct I/O ->end_io:
>   */
> -- 
> 2.27.0
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] iomap: Add iomap_iter
  2020-07-28 17:32 ` [PATCH 1/2] iomap: Add iomap_iter Matthew Wilcox (Oracle)
@ 2020-08-11 21:01   ` Darrick J. Wong
  2020-08-11 21:32     ` Matthew Wilcox
  0 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2020-08-11 21:01 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: Christoph Hellwig, linux-xfs, linux-fsdevel

On Tue, Jul 28, 2020 at 06:32:14PM +0100, Matthew Wilcox (Oracle) wrote:
> The iomap_iter provides a convenient way to package up and maintain
> all the arguments to the various mapping and operation functions.
> iomi_advance() moves the iterator forward to the next chunk of the file.
> This approach has only one callback to the filesystem -- the iomap_next_t
> instead of the separate iomap_begin / iomap_end calls.  This slightly
> complicates the filesystems, but is more efficient.  The next function

How much more efficient?  I've wondered since the start of
spectre/meltdown if in ten years we're still going to appreciate the
increase in code complexity that comes from trying to avoid indirect
function calls.  That said, we needed an iomap iterator long before
that, so I think I mostly like this.

> will be called even after an error has occurred to allow the filesystem
> the opportunity to clean up.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/apply.c      | 29 ++++++++++++++++++++++
>  include/linux/iomap.h | 57 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 86 insertions(+)
> 
> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> index 76925b40b5fd..c83dcd203558 100644
> --- a/fs/iomap/apply.c
> +++ b/fs/iomap/apply.c
> @@ -92,3 +92,32 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  
>  	return written ? written : ret;
>  }
> +
> +bool iomi_advance(struct iomap_iter *iomi, int err)
> +{
> +	struct iomap *iomap = &iomi->iomap;
> +
> +	if (likely(!err)) {
> +		iomi->pos += iomi->copied;
> +		iomi->len -= iomi->copied;
> +		iomi->ret += iomi->copied;
> +		if (!iomi->len)
> +			return false;
> +		iomi->copied = 0;
> +		if (WARN_ON(iomap->offset > iomi->pos))
> +			err = -EIO;
> +		if (WARN_ON(iomap->offset + iomap->length <= iomi->pos))
> +			err = -EIO;
> +	}
> +
> +	if (unlikely(err < 0)) {
> +		if (iomi->copied < 0)
> +			return false;
> +		/* Give the body a chance to see the error and clean up */
> +		iomi->copied = err;
> +		if (!iomi->ret)
> +			iomi->ret = err;
> +	}
> +	return true;
> +}
> +EXPORT_SYMBOL_GPL(iomi_advance);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 4d1d3c3469e9..fe58e68ec0c1 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -142,6 +142,63 @@ struct iomap_ops {
>  			ssize_t written, unsigned flags, struct iomap *iomap);
>  };
>  
> +/**
> + * struct iomap_iter - Iterate through a range of a file.
> + * @inode: Set at the start of the iteration and should not change.
> + * @pos: The current file position we are operating on.  It is updated by
> + *	calls to iomap_iter().  Treat as read-only in the body.
> + * @len: The remaining length of the file segment we're operating on.
> + *	It is updated at the same time as @pos.
> + * @ret: What we want our declaring function to return.  It is initialised
> + *	to zero and is the cumulative number of bytes processed so far.
> + *	It is updated at the same time as @pos.
> + * @copied: The number of bytes operated on by the body in the most
> + *	recent iteration.  If no bytes were operated on, it may be set to
> + *	a negative errno.  0 is treated as -EIO.
> + * @flags: Zero or more of the iomap_begin flags above.
> + * @iomap: ...
> + * @srcma:s ...

...? :)

> + */
> +struct iomap_iter {
> +	struct inode *inode;
> +	loff_t pos;
> +	u64 len;

Thanks for leaving this u64 :)

> +	loff_t ret;
> +	ssize_t copied;

Is this going to be sufficient for SEEK_{HOLE,DATA} when it wants to
jump ahead by more than 2GB?

> +	unsigned flags;
> +	struct iomap iomap;
> +	struct iomap srcmap;
> +};
> +
> +#define IOMAP_ITER(name, _inode, _pos, _len, _flags)			\
> +	struct iomap_iter name = {					\
> +		.inode = _inode,					\
> +		.pos = _pos,						\
> +		.len = _len,						\
> +		.flags = _flags,					\
> +	}
> +
> +typedef int (*iomap_next_t)(const struct iomap_iter *,
> +		struct iomap *iomap, struct iomap *srcmap);

I muttered in my other reply how these probably should be called
iomap_iter_advance_t since they actually do the upper level work of
advancing the iterator to the next mapping.

--D

> +bool iomi_advance(struct iomap_iter *iomi, int err);
> +
> +static inline bool iomap_iter(struct iomap_iter *iomi, iomap_next_t next)
> +{
> +	if (iomi->ret && iomi->copied == 0)
> +		iomi->copied = -EIO;
> +
> +	return iomi_advance(iomi, next(iomi, &iomi->iomap, &iomi->srcmap));
> +}
> +
> +static inline u64 iomap_length(const struct iomap_iter *iomi)
> +{
> +	u64 end = iomi->iomap.offset + iomi->iomap.length;
> +
> +	if (iomi->srcmap.type != IOMAP_HOLE)
> +		end = min(end, iomi->srcmap.offset + iomi->srcmap.length);
> +	return min(iomi->len, end - iomi->pos);
> +}
> +
>  /*
>   * Main iomap iterator function.
>   */
> -- 
> 2.27.0
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] iomap: Add iomap_iter
  2020-08-11 21:01   ` Darrick J. Wong
@ 2020-08-11 21:32     ` Matthew Wilcox
  0 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2020-08-11 21:32 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs, linux-fsdevel

On Tue, Aug 11, 2020 at 02:01:27PM -0700, Darrick J. Wong wrote:
> On Tue, Jul 28, 2020 at 06:32:14PM +0100, Matthew Wilcox (Oracle) wrote:
> > The iomap_iter provides a convenient way to package up and maintain
> > all the arguments to the various mapping and operation functions.
> > iomi_advance() moves the iterator forward to the next chunk of the file.
> > This approach has only one callback to the filesystem -- the iomap_next_t
> > instead of the separate iomap_begin / iomap_end calls.  This slightly
> > complicates the filesystems, but is more efficient.  The next function
> 
> How much more efficient?  I've wondered since the start of

I haven't done any performance measurements.  Not entirely sure what
I'd do to measure it, to be honest.  I suppose I should also note the
decreased stack depth here, although I do mention that in the next patch.

> > +/**
> > + * struct iomap_iter - Iterate through a range of a file.
> > + * @inode: Set at the start of the iteration and should not change.
> > + * @pos: The current file position we are operating on.  It is updated by
> > + *	calls to iomap_iter().  Treat as read-only in the body.
> > + * @len: The remaining length of the file segment we're operating on.
> > + *	It is updated at the same time as @pos.
> > + * @ret: What we want our declaring function to return.  It is initialised
> > + *	to zero and is the cumulative number of bytes processed so far.
> > + *	It is updated at the same time as @pos.
> > + * @copied: The number of bytes operated on by the body in the most
> > + *	recent iteration.  If no bytes were operated on, it may be set to
> > + *	a negative errno.  0 is treated as -EIO.
> > + * @flags: Zero or more of the iomap_begin flags above.
> > + * @iomap: ...
> > + * @srcma:s ...
> 
> ...? :)

I ran out of tuits.  If this approach makes people happy, I can finish it
off.

> > + */
> > +struct iomap_iter {
> > +	struct inode *inode;
> > +	loff_t pos;
> > +	u64 len;
> 
> Thanks for leaving this u64 :)
> 
> > +	loff_t ret;
> > +	ssize_t copied;
> 
> Is this going to be sufficient for SEEK_{HOLE,DATA} when it wants to
> jump ahead by more than 2GB?

That's a good point.  loff_t, I guess.  Even though the current users
of iomap don't support extents larger than 2GB ;-)

> > +	unsigned flags;
> > +	struct iomap iomap;
> > +	struct iomap srcmap;
> > +};
> > +
> > +#define IOMAP_ITER(name, _inode, _pos, _len, _flags)			\
> > +	struct iomap_iter name = {					\
> > +		.inode = _inode,					\
> > +		.pos = _pos,						\
> > +		.len = _len,						\
> > +		.flags = _flags,					\
> > +	}
> > +
> > +typedef int (*iomap_next_t)(const struct iomap_iter *,
> > +		struct iomap *iomap, struct iomap *srcmap);
> 
> I muttered in my other reply how these probably should be called
> iomap_iter_advance_t since they actually do the upper level work of
> advancing the iterator to the next mapping.

nono, the iterator is const!  They don't move the iterator at all,
they just get the next iomap/srcmap.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] iomap: Convert readahead to iomap_iter
  2020-08-11 20:56   ` Darrick J. Wong
@ 2020-08-11 22:31     ` Matthew Wilcox
  0 siblings, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2020-08-11 22:31 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs, linux-fsdevel

On Tue, Aug 11, 2020 at 01:56:13PM -0700, Darrick J. Wong wrote:
> > @@ -625,7 +625,14 @@ STATIC void
> >  xfs_vm_readahead(
> >  	struct readahead_control	*rac)
> >  {
> > -	iomap_readahead(rac, &xfs_read_iomap_ops);
> > +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> > +			readahead_length(rac), 0);
> > +	struct iomap_readpage_ctx ctx = {
> > +		.rac = rac,
> > +	};
> > +
> > +	while (iomap_iter(&iomi, xfs_iomap_next_read))
> > +		iomi.copied = iomap_readahead(&iomi, &ctx);
> 
> Why not have iomap_readahead set iomi.copied on its way out?  The actor
> function is supposed to set iomi.ret if an error happens, right?

I actually wanted to make iomap_readahead take a const pointer.
This should do the trick.

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index fff23ed6a682..3ca128a3b044 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -377,7 +377,8 @@ EXPORT_SYMBOL_GPL(iomap_readpage);
  * function is called with memalloc_nofs set, so allocations will not cause
  * the filesystem to be reentered.
  */
-loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
+loff_t iomap_readahead(const struct iomap_iter *iomi,
+		struct iomap *iomap, struct iomap_readpage_ctx *ctx)
 {
 	loff_t done, ret, length = iomap_length(iomi);
 
@@ -393,8 +394,7 @@ loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
 			ctx->cur_page_in_bio = false;
 		}
 		ret = iomap_readpage_actor(iomi->inode, iomi->pos + done,
-				length - done, ctx,
-				&iomi->iomap, &iomi->srcmap);
+				length - done, ctx, iomap, NULL);
 	}
 
 	if (iomi->len == done) {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 2884752e40e8..62777daefe94 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -632,7 +632,7 @@ xfs_vm_readahead(
 	};
 
 	while (iomap_iter(&iomi, xfs_iomap_next_read))
-		iomi.copied = iomap_readahead(&iomi, &ctx);
+		iomi.copied = iomap_readahead(&iomi, &iomi.iomap, &ctx);
 }
 
 static int
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 4842b85ce36d..6ae51bf1d77c 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -99,7 +99,7 @@ static void zonefs_readahead(struct readahead_control *rac)
 	};
 
 	while (iomap_iter(&iomi, zonefs_iomap_next))
-		iomi.copied = iomap_readahead(&iomi, &ctx);
+		iomi.copied = iomap_readahead(&iomi, &iomi.iomap, &ctx);
 }
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index dd9bfed85c4f..11a104129a04 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -305,7 +305,8 @@ struct iomap_readpage_ctx {
 	struct readahead_control *rac;
 };
 
-loff_t iomap_readahead(struct iomap_iter *, struct iomap_readpage_ctx *);
+loff_t iomap_readahead(const struct iomap_iter *, struct iomap *,
+		struct iomap_readpage_ctx *);
 
 /*
  * Flags for direct I/O ->end_io:

> Oh wait no, the actor function returns a positive copied value, or a
> negative error code, and then it's up to the _next_read function to
> notice if copied is negative, stuff it in ret, and then return false to
> stop the iteration?

I want to handle all the changes to iomap_iter in iomap_iter() and
iomi_advance() so people writing new things that use iomap_iter don't
need to think about what they should modify.  Just return the error;
done.

One of the more convoluted bits of this is making sure that both the
filesystem and the body of the loop get the chance to clean up their state
if the other encounters an error.  So if 'copied' is set to an errno by
the body, then we call next() anyway (and stop the iteration).  And if
next() returns an error, we iterate the body once more.  We'll still
call next() again even if it did return an error, because it might not
have realised that returning a completely bogus iomap was an error.

> > +int
> > +xfs_iomap_next_read(
> > +	const struct iomap_iter *iomi,
> > +	struct iomap		*iomap,
> > +	struct iomap		*srcmap)
> 
> Aren't these last two parameters already in the iomap iter?
> Are they passed separately to work around the pointer being const?

Exactly.

> > +{
> > +	if (iomi->copied < 0)
> > +		return iomi->copied;
> 
> Is this boilerplate going to end up in every single iomap_next_t
> function?  If so, it should probably just go in iomap_iter prior to the
> next() call, right?

This is to give the next_t the opportunity to clean up after itself.
ie it's for the things currently done in ->iomap_end().  So when we
replace xfs_buffered_write_iomap_ops, you'll see it used then.

> > +	if (iomi->copied >= iomi->len)
> > +		return 0;
> 
> Er... if we copied more than we asked for, doesn't that imply something
> bad just happened?

erm ... maybe?  We don't currently sanity-check the return value from
actor() in iomap_apply().  Perhaps we should?

> > +
> > +	return xfs_read_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> > +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
> 
> Would be kinda nice if you could just pass the whole iomap_iter, but I
> get that we're probably stuck with this until the entirety gets
> converted.

Yeah.  I could probably do it the other way round where
xfs_read_iomap_begin() constructs an iomap_iter on the stack and passes
it to xfs_read_iomap_begin().  I don't think it makes much difference.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-28 17:32 [RFC 0/2] Avoid indirect function calls in iomap Matthew Wilcox (Oracle)
2020-07-28 17:32 ` [PATCH 1/2] iomap: Add iomap_iter Matthew Wilcox (Oracle)
2020-08-11 21:01   ` Darrick J. Wong
2020-08-11 21:32     ` Matthew Wilcox
2020-07-28 17:32 ` [PATCH 2/2] iomap: Convert readahead to iomap_iter Matthew Wilcox (Oracle)
2020-08-11 20:56   ` Darrick J. Wong
2020-08-11 22:31     ` Matthew Wilcox

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git