linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v7] iomap: make inline data support more flexible
@ 2021-07-23 17:41 Gao Xiang
  2021-07-23 19:40 ` Matthew Wilcox
                   ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Gao Xiang @ 2021-07-23 17:41 UTC (permalink / raw)
  To: linux-erofs, linux-fsdevel
  Cc: Darrick J . Wong, Andreas Gruenbacher, LKML, Matthew Wilcox,
	Christoph Hellwig

Add support for reading inline data content into the page cache from
nonzero page-aligned file offsets.  This enables the EROFS tailpacking
mode where the last few bytes of the file are stored right after the
inode.

The buffered write path remains untouched since EROFS cannot be used
for testing. It'd be better to be implemented if upcoming real users
care and provide a real pattern rather than leave untested dead code
around.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
Tested-by: Huang Jianan <huangjianan@oppo.com> # erofs
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
v6: https://lore.kernel.org/r/20210722031729.51628-1-hsiangkao@linux.alibaba.com
changes since v6:
 - based on Christoph's reply;
 - update commit message suggested by Darrick;
 - disable buffered write path until some real fs users.

 fs/iomap/buffered-io.c | 42 ++++++++++++++++++++++++++----------------
 fs/iomap/direct-io.c   | 10 ++++++----
 include/linux/iomap.h  | 14 ++++++++++++++
 3 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 87ccb3438bec..f351e1f9e3f6 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -205,25 +205,29 @@ struct iomap_readpage_ctx {
 	struct readahead_control *rac;
 };
 
-static void
-iomap_read_inline_data(struct inode *inode, struct page *page,
-		struct iomap *iomap)
+static int iomap_read_inline_data(struct inode *inode, struct page *page,
+		struct iomap *iomap, loff_t pos)
 {
-	size_t size = i_size_read(inode);
+	size_t size = iomap->length + iomap->offset - pos;
 	void *addr;
 
 	if (PageUptodate(page))
-		return;
+		return PAGE_SIZE;
 
-	BUG_ON(page_has_private(page));
-	BUG_ON(page->index);
-	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/* inline data must start page aligned in the file */
+	if (WARN_ON_ONCE(offset_in_page(pos)))
+		return -EIO;
+	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
+		return -EIO;
+	if (WARN_ON_ONCE(page_has_private(page)))
+		return -EIO;
 
 	addr = kmap_atomic(page);
-	memcpy(addr, iomap->inline_data, size);
+	memcpy(addr, iomap_inline_buf(iomap, pos), size);
 	memset(addr + size, 0, PAGE_SIZE - size);
 	kunmap_atomic(addr);
 	SetPageUptodate(page);
+	return PAGE_SIZE;
 }
 
 static inline bool iomap_block_needs_zeroing(struct inode *inode,
@@ -246,11 +250,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	unsigned poff, plen;
 	sector_t sector;
 
-	if (iomap->type == IOMAP_INLINE) {
-		WARN_ON_ONCE(pos);
-		iomap_read_inline_data(inode, page, iomap);
-		return PAGE_SIZE;
-	}
+	if (iomap->type == IOMAP_INLINE)
+		return iomap_read_inline_data(inode, page, iomap, pos);
 
 	/* zero post-eof blocks as the page may be mapped */
 	iop = iomap_page_create(inode, page);
@@ -589,6 +590,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 	return 0;
 }
 
+static int iomap_write_begin_inline(struct inode *inode,
+		struct page *page, struct iomap *srcmap)
+{
+	/* needs more work for the tailpacking case, disable for now */
+	if (WARN_ON_ONCE(srcmap->offset != 0))
+		return -EIO;
+	return iomap_read_inline_data(inode, page, srcmap, 0);
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
@@ -618,14 +628,14 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	}
 
 	if (srcmap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, srcmap);
+		status = iomap_write_begin_inline(inode, page, srcmap);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
 		status = __iomap_write_begin(inode, pos, len, flags, page,
 				srcmap);
 
-	if (unlikely(status))
+	if (unlikely(status < 0))
 		goto out_unlock;
 
 	*pagep = page;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9398b8c31323..a6aaea2764a5 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 		struct iomap_dio *dio, struct iomap *iomap)
 {
 	struct iov_iter *iter = dio->submit.iter;
+	void *dst = iomap_inline_buf(iomap, pos);
 	size_t copied;
 
-	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
+		return -EIO;
 
 	if (dio->flags & IOMAP_DIO_WRITE) {
 		loff_t size = inode->i_size;
 
 		if (pos > size)
-			memset(iomap->inline_data + size, 0, pos - size);
-		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+			memset(iomap_inline_buf(iomap, size), 0, pos - size);
+		copied = copy_from_iter(dst, length, iter);
 		if (copied) {
 			if (pos + copied > size)
 				i_size_write(inode, pos + copied);
 			mark_inode_dirty(inode);
 		}
 	} else {
-		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+		copied = copy_to_iter(dst, length, iter);
 	}
 	dio->size += copied;
 	return copied;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 479c1da3e221..56b118c6d05c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -97,6 +97,20 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static inline void *iomap_inline_buf(const struct iomap *iomap, loff_t pos)
+{
+	return iomap->inline_data - iomap->offset + pos;
+}
+
+/*
+ * iomap->inline_data is a potentially kmapped page, ensure it never crosses a
+ * page boundary.
+ */
+static inline bool iomap_inline_data_size_valid(const struct iomap *iomap)
+{
+	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
+}
+
 /*
  * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
  * and page_done will be called for each page written to.  This only applies to
-- 
2.24.4


^ permalink raw reply related	[flat|nested] 31+ messages in thread
* Re: [PATCH v7] iomap: make inline data support more flexible
@ 2021-08-01 10:29 Andreas Gruenbacher
  0 siblings, 0 replies; 31+ messages in thread
From: Andreas Gruenbacher @ 2021-08-01 10:29 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andreas Gruenbacher, Darrick J . Wong, linux-kernel,
	linux-fsdevel, linux-erofs, Christoph Hellwig

On Mon, Jul 26, 2021 at 2:33 PM Matthew Wilcox <willy@infradead.org> wrote:
> Only tangentially related ... why do we memcpy the data into the tail
> at write_end() time instead of at writepage() time?  I see there's a
> workaround for that in gfs2's page_mkwrite():
>
>         if (gfs2_is_stuffed(ip)) {
>                 err = gfs2_unstuff_dinode(ip);
>
> (an mmap store cannot change the size of the file, so this would be
> unnecessary)
>
> Something like this ...

We can't just bail out after iomap_write_inline_data in
iomap_writepage_map; the page also needs to be unlocked.  Also, we want
to dirty the inode after copying out the inline data and unlocking the
page to make sure the inode gets written out.

Not sure if this can be further simplified.

Tested on gfs2 on top of:

 [PATCH v9] iomap: Support file tail packing [1]
 [PATCH v2] iomap: Support inline data with block size < page size [2]
 [PATCH] gfs2: iomap inline data handling cleanup [3]

[1] https://lore.kernel.org/linux-fsdevel/20210727025956.80684-1-hsiangkao@linux.alibaba.com/	
[2] https://lore.kernel.org/linux-fsdevel/20210729032344.3975412-1-willy@infradead.org/
[3] https://listman.redhat.com/archives/cluster-devel/2021-July/msg00244.html

Thanks,
Andreas

---
 fs/gfs2/bmap.c         |  3 ---
 fs/gfs2/file.c         |  9 ---------
 fs/iomap/buffered-io.c | 29 +++++++++++++++++++----------
 3 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 84ad0fe787ea..4cea16d6a3fa 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2527,9 +2527,6 @@ static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
 {
 	int ret;
 
-	if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
-		return -EIO;
-
 	if (offset >= wpc->iomap.offset &&
 	    offset < wpc->iomap.offset + wpc->iomap.length)
 		return 0;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 84ec053d43b4..ce8f5eb66db7 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -510,15 +510,6 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 		goto out_trans_fail;
 	}
 
-	/* Unstuff, if required, and allocate backing blocks for page */
-	if (gfs2_is_stuffed(ip)) {
-		err = gfs2_unstuff_dinode(ip);
-		if (err) {
-			ret = block_page_mkwrite_return(err);
-			goto out_trans_end;
-		}
-	}
-
 	lock_page(page);
 	/* If truncated, we must retry the operation, we may have raced
 	 * with the glock demotion code.
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 77d4fe5c1327..a1eb876a9445 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -683,21 +683,23 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 	return copied;
 }
 
-static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
-		struct iomap *iomap, loff_t pos, size_t copied)
+static int iomap_write_inline_data(struct inode *inode, struct page *page,
+		struct iomap *iomap)
 {
+	size_t size = i_size_read(inode) - page_offset(page);
 	void *addr;
 
 	WARN_ON_ONCE(!PageUptodate(page));
 	BUG_ON(!iomap_inline_data_valid(iomap));
+	if (WARN_ON_ONCE(size > iomap->length))
+		return -EIO;
 
 	flush_dcache_page(page);
 	addr = kmap_atomic(page);
-	memcpy(iomap_inline_data(iomap, pos), addr + pos, copied);
+	memcpy(iomap->inline_data, addr, size);
 	kunmap_atomic(addr);
 
-	mark_inode_dirty(inode);
-	return copied;
+	return 0;
 }
 
 /* Returns the number of bytes copied.  May be 0.  Cannot be an errno. */
@@ -709,9 +711,7 @@ static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 	loff_t old_size = inode->i_size;
 	size_t ret;
 
-	if (srcmap->type == IOMAP_INLINE) {
-		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
-	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
+	if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
 		ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
 				page, NULL);
 	} else {
@@ -1329,6 +1329,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	u64 file_offset; /* file offset of page */
 	int error = 0, count = 0, i;
 	LIST_HEAD(submit_list);
+	bool dirty_inode = false;
 
 	WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
 
@@ -1346,8 +1347,13 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 		error = wpc->ops->map_blocks(wpc, inode, file_offset);
 		if (error)
 			break;
-		if (WARN_ON_ONCE(wpc->iomap.type == IOMAP_INLINE))
-			continue;
+		if (wpc->iomap.type == IOMAP_INLINE) {
+			error = iomap_write_inline_data(inode, page,
+					&wpc->iomap);
+			if (!error)
+				dirty_inode = true;
+			break;
+		}
 		if (wpc->iomap.type == IOMAP_HOLE)
 			continue;
 		iomap_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
@@ -1405,6 +1411,9 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	 */
 	if (!count)
 		end_page_writeback(page);
+
+	if (dirty_inode)
+		mark_inode_dirty(inode);
 done:
 	mapping_set_error(page->mapping, error);
 	return error;
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2021-08-01 10:29 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-23 17:41 [PATCH v7] iomap: make inline data support more flexible Gao Xiang
2021-07-23 19:40 ` Matthew Wilcox
2021-07-24  0:54   ` Gao Xiang
2021-07-25 21:39 ` Andreas Grünbacher
2021-07-25 22:16 ` Andreas Gruenbacher
2021-07-26  2:36   ` Gao Xiang
2021-07-26  7:22     ` Andreas Gruenbacher
2021-07-26  7:38       ` Gao Xiang
2021-07-26 21:36       ` Darrick J. Wong
2021-07-26 22:20         ` Andreas Grünbacher
2021-07-26  3:06   ` Matthew Wilcox
2021-07-26  6:56     ` Andreas Gruenbacher
2021-07-26  4:00   ` Gao Xiang
2021-07-26  8:08     ` Andreas Grünbacher
2021-07-26  8:17       ` Gao Xiang
2021-07-26 11:06     ` Andreas Gruenbacher
2021-07-26 12:17       ` Christoph Hellwig
2021-07-26 12:27         ` Andreas Grünbacher
2021-07-26 12:50           ` Gao Xiang
2021-07-26 13:10             ` Andreas Gruenbacher
2021-07-26 13:27           ` Christoph Hellwig
2021-07-27  8:20         ` David Sterba
2021-07-27 13:35           ` Matthew Wilcox
2021-07-27 15:04             ` Gao Xiang
2021-07-27 16:53             ` David Sterba
2021-07-26 12:32       ` Matthew Wilcox
2021-07-26 13:03         ` Andreas Gruenbacher
2021-07-26 13:12           ` Gao Xiang
2021-07-26 13:30             ` Christoph Hellwig
2021-07-26  8:08 ` Joseph Qi
2021-08-01 10:29 Andreas Gruenbacher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).