* [RFC] iomap: Remove indirect function call
@ 2020-03-28 15:51 Matthew Wilcox
2020-03-31 7:46 ` Christoph Hellwig
0 siblings, 1 reply; 3+ messages in thread
From: Matthew Wilcox @ 2020-03-28 15:51 UTC (permalink / raw)
To: linux-fsdevel; +Cc: Christoph Hellwig
By splitting iomap_apply into __iomap_apply and an inline iomap_apply,
we convert the call to 'actor' into a direct function call. I haven't
done any performance measurements, but given the current costs of indirect
function calls, this would seem worthwhile to me?
diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
index 76925b40b5fd..c747cbf66a3d 100644
--- a/fs/iomap/apply.c
+++ b/fs/iomap/apply.c
@@ -9,24 +9,11 @@
#include <linux/iomap.h>
#include "trace.h"
-/*
- * Execute a iomap write on a segment of the mapping that spans a
- * contiguous range of pages that have identical block mapping state.
- *
- * This avoids the need to map pages individually, do individual allocations
- * for each page and most importantly avoid the need for filesystem specific
- * locking per page. Instead, all the operations are amortised over the entire
- * range of pages. It is assumed that the filesystems will lock whatever
- * resources they require in the iomap_begin call, and release them in the
- * iomap_end call.
- */
-loff_t
-iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
- const struct iomap_ops *ops, void *data, iomap_actor_t actor)
+loff_t __iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap, struct iomap *srcmap,
+ const struct iomap_ops *ops, iomap_actor_t actor)
{
- struct iomap iomap = { .type = IOMAP_HOLE };
- struct iomap srcmap = { .type = IOMAP_HOLE };
- loff_t written = 0, ret;
+ loff_t ret;
u64 end;
trace_iomap_apply(inode, pos, length, flags, ops, actor, _RET_IP_);
@@ -43,52 +30,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
* expose transient stale data. If the reserve fails, we can safely
* back out at this point as there is nothing to undo.
*/
- ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
+ ret = ops->iomap_begin(inode, pos, length, flags, iomap, srcmap);
if (ret)
return ret;
- if (WARN_ON(iomap.offset > pos))
+ if (WARN_ON(iomap->offset > pos))
return -EIO;
- if (WARN_ON(iomap.length == 0))
+ if (WARN_ON(iomap->length == 0))
return -EIO;
- trace_iomap_apply_dstmap(inode, &iomap);
- if (srcmap.type != IOMAP_HOLE)
- trace_iomap_apply_srcmap(inode, &srcmap);
+ trace_iomap_apply_dstmap(inode, iomap);
+ if (srcmap->type != IOMAP_HOLE)
+ trace_iomap_apply_srcmap(inode, srcmap);
/*
* Cut down the length to the one actually provided by the filesystem,
* as it might not be able to give us the whole size that we requested.
*/
- end = iomap.offset + iomap.length;
- if (srcmap.type != IOMAP_HOLE)
- end = min(end, srcmap.offset + srcmap.length);
+ end = iomap->offset + iomap->length;
+ if (srcmap->type != IOMAP_HOLE)
+ end = min(end, srcmap->offset + srcmap->length);
if (pos + length > end)
length = end - pos;
-
- /*
- * Now that we have guaranteed that the space allocation will succeed,
- * we can do the copy-in page by page without having to worry about
- * failures exposing transient data.
- *
- * To support COW operations, we read in data for partially blocks from
- * the srcmap if the file system filled it in. In that case we the
- * length needs to be limited to the earlier of the ends of the iomaps.
- * If the file system did not provide a srcmap we pass in the normal
- * iomap into the actors so that they don't need to have special
- * handling for the two cases.
- */
- written = actor(inode, pos, length, data, &iomap,
- srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
-
- /*
- * Now the data has been copied, commit the range we've copied. This
- * should not fail unless the filesystem has had a fatal error.
- */
- if (ops->iomap_end) {
- ret = ops->iomap_end(inode, pos, length,
- written > 0 ? written : 0,
- flags, &iomap);
- }
-
- return written ? written : ret;
+ return length;
}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 8b09463dae0d..31e82e4d30f8 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -148,9 +148,62 @@ struct iomap_ops {
typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
void *data, struct iomap *iomap, struct iomap *srcmap);
-loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
- unsigned flags, const struct iomap_ops *ops, void *data,
- iomap_actor_t actor);
+loff_t __iomap_apply(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap, struct iomap *srcmap,
+ const struct iomap_ops *ops, iomap_actor_t actor);
+
+/*
+ * Execute a iomap write on a segment of the mapping that spans a
+ * contiguous range of pages that have identical block mapping state.
+ *
+ * This avoids the need to map pages individually, do individual allocations
+ * for each page and most importantly avoid the need for filesystem specific
+ * locking per page. Instead, all the operations are amortised over the entire
+ * range of pages. It is assumed that the filesystems will lock whatever
+ * resources they require in the iomap_begin call, and release them in the
+ * iomap_end call.
+ */
+static inline loff_t
+iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
+ const struct iomap_ops *ops, void *data, iomap_actor_t actor)
+{
+ struct iomap iomap = { .type = IOMAP_HOLE };
+ struct iomap srcmap = { .type = IOMAP_HOLE };
+ loff_t written;
+
+ length = __iomap_apply(inode, pos, length, flags, &iomap, &srcmap,
+ ops, actor);
+ /*
+ * Now that we have guaranteed that the space allocation will succeed,
+ * we can do the copy-in page by page without having to worry about
+ * failures exposing transient data.
+ *
+ * To support COW operations, we read in data for partially blocks from
+ * the srcmap if the file system filled it in. In that case we the
+ * length needs to be limited to the earlier of the ends of the iomaps.
+ * If the file system did not provide a srcmap we pass in the normal
+ * iomap into the actors so that they don't need to have special
+ * handling for the two cases.
+ */
+ if (length < 0)
+ return length;
+
+ written = actor(inode, pos, length, data, &iomap,
+ srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
+ /*
+ * Now the data has been copied, commit the range we've copied. This
+ * should not fail unless the filesystem has had a fatal error.
+ */
+ if (ops->iomap_end) {
+ loff_t ret = ops->iomap_end(inode, pos, length,
+ written > 0 ? written : 0,
+ flags, &iomap);
+ if (!written)
+ written = ret;
+ }
+
+ return written;
+}
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [RFC] iomap: Remove indirect function call
2020-03-28 15:51 [RFC] iomap: Remove indirect function call Matthew Wilcox
@ 2020-03-31 7:46 ` Christoph Hellwig
2020-03-31 13:02 ` Matthew Wilcox
0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2020-03-31 7:46 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-fsdevel, Christoph Hellwig
On Sat, Mar 28, 2020 at 08:51:56AM -0700, Matthew Wilcox wrote:
> By splitting iomap_apply into __iomap_apply and an inline iomap_apply,
> we convert the call to 'actor' into a direct function call. I haven't
> done any performance measurements, but given the current costs of indirect
> function calls, this would seem worthwhile to me?
Hmm. Given that emount of compiler stupidity we are dealing with did
you at least look at the assembly output to see if this actually removes
the indirect call? I wouldn't be quite sure.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC] iomap: Remove indirect function call
2020-03-31 7:46 ` Christoph Hellwig
@ 2020-03-31 13:02 ` Matthew Wilcox
0 siblings, 0 replies; 3+ messages in thread
From: Matthew Wilcox @ 2020-03-31 13:02 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel
On Tue, Mar 31, 2020 at 12:46:28AM -0700, Christoph Hellwig wrote:
> On Sat, Mar 28, 2020 at 08:51:56AM -0700, Matthew Wilcox wrote:
> > By splitting iomap_apply into __iomap_apply and an inline iomap_apply,
> > we convert the call to 'actor' into a direct function call. I haven't
> > done any performance measurements, but given the current costs of indirect
> > function calls, this would seem worthwhile to me?
>
> Hmm. Given that emount of compiler stupidity we are dealing with did
> you at least look at the assembly output to see if this actually removes
> the indirect call? I wouldn't be quite sure.
If it does get inlined, then the compiler does it:
b9d: e8 ae fe ff ff callq a50 <iomap_page_mkwrite_actor>
If not, then the compiler emits a function in each file called iomap_apply
which makes an indirect call. So s/inline/__always_inline/ in the original
patch.
before:
text data bss dec hex filename
5314 4648 0 9962 26ea fs/iomap/trace.o
1050 72 0 1122 462 fs/iomap/apply.o
17316 636 224 18176 4700 fs/iomap/buffered-io.o
4773 76 0 4849 12f1 fs/iomap/direct-io.o
1335 28 0 1363 553 fs/iomap/fiemap.o
1928 28 0 1956 7a4 fs/iomap/seek.o
1394 8 0 1402 57a fs/iomap/swapfile.o
after:
text data bss dec hex filename
5314 4648 0 9962 26ea fs/iomap/trace.o
722 72 0 794 31a fs/iomap/apply.o
18784 636 224 19644 4cbc fs/iomap/buffered-io.o
5169 76 0 5245 147d fs/iomap/direct-io.o
2093 28 0 2121 849 fs/iomap/fiemap.o
2467 28 0 2495 9bf fs/iomap/seek.o
1664 8 0 1672 688 fs/iomap/swapfile.o
33110 to 36213 bytes of text. So not free. Worthwhile?
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-03-31 15:53 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-28 15:51 [RFC] iomap: Remove indirect function call Matthew Wilcox
2020-03-31 7:46 ` Christoph Hellwig
2020-03-31 13:02 ` Matthew Wilcox
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).