linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] iomap: support tail packing inline read
@ 2021-07-19 14:47 Gao Xiang
  2021-07-19 15:02 ` Matthew Wilcox
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Gao Xiang @ 2021-07-19 14:47 UTC (permalink / raw)
  To: linux-erofs, linux-fsdevel
  Cc: Darrick J . Wong, Andreas Gruenbacher, LKML, Matthew Wilcox,
	Christoph Hellwig

This tries to add tail packing inline read to iomap, which can support
several inline tail blocks. Similar to the previous approach, it cleans
post-EOF in one iteration.

The write path remains untouched since EROFS cannot be used for testing.
It'd be better to be implemented if upcoming real users care rather than
leave untested dead code around.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
v2: https://lore.kernel.org/r/YPLdSja%2F4FBsjss%2F@B-P7TQMD6M-0146.local/
changes since v2:
 - update suggestion from Christoph:
    https://lore.kernel.org/r/YPVe41YqpfGLNsBS@infradead.org/

Hi Andreas,
would you mind test on the gfs2 side? Thanks in advance!

Thanks,
Gao Xiang

 fs/iomap/buffered-io.c | 50 ++++++++++++++++++++++++++----------------
 fs/iomap/direct-io.c   | 11 ++++++----
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 87ccb3438bec..cac8a88660d8 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -207,23 +207,22 @@ struct iomap_readpage_ctx {
 
 static void
 iomap_read_inline_data(struct inode *inode, struct page *page,
-		struct iomap *iomap)
+		struct iomap *iomap, loff_t pos)
 {
-	size_t size = i_size_read(inode);
+	unsigned int size, poff = offset_in_page(pos);
 	void *addr;
 
-	if (PageUptodate(page))
-		return;
-
-	BUG_ON(page_has_private(page));
-	BUG_ON(page->index);
-	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/* inline source data must be inside a single page */
+	BUG_ON(iomap->length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/* handle tail-packing blocks cross the current page into the next */
+	size = min_t(unsigned int, iomap->length + pos - iomap->offset,
+		     PAGE_SIZE - poff);
 
 	addr = kmap_atomic(page);
-	memcpy(addr, iomap->inline_data, size);
-	memset(addr + size, 0, PAGE_SIZE - size);
+	memcpy(addr + poff, iomap->inline_data - iomap->offset + pos, size);
+	memset(addr + poff + size, 0, PAGE_SIZE - poff - size);
 	kunmap_atomic(addr);
-	SetPageUptodate(page);
+	iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
 }
 
 static inline bool iomap_block_needs_zeroing(struct inode *inode,
@@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	unsigned poff, plen;
 	sector_t sector;
 
-	if (iomap->type == IOMAP_INLINE) {
-		WARN_ON_ONCE(pos);
-		iomap_read_inline_data(inode, page, iomap);
-		return PAGE_SIZE;
-	}
-
-	/* zero post-eof blocks as the page may be mapped */
 	iop = iomap_page_create(inode, page);
+	/* needs to skip some leading uptodated blocks */
 	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
 	if (plen == 0)
 		goto done;
 
+	if (iomap->type == IOMAP_INLINE) {
+		iomap_read_inline_data(inode, page, iomap, pos);
+		plen = PAGE_SIZE - poff;
+		goto done;
+	}
+
+	/* zero post-eof blocks as the page may be mapped */
 	if (iomap_block_needs_zeroing(inode, iomap, pos)) {
 		zero_user(page, poff, plen);
 		iomap_set_range_uptodate(page, poff, plen);
@@ -589,6 +589,18 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 	return 0;
 }
 
+static int iomap_write_begin_inline(struct inode *inode, loff_t pos,
+		struct page *page, struct iomap *srcmap)
+{
+	/* needs more work for the tailpacking case, disable for now */
+	if (WARN_ON_ONCE(pos != 0))
+		return -EIO;
+	if (PageUptodate(page))
+		return 0;
+	iomap_read_inline_data(inode, page, srcmap, pos);
+	return 0;
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
@@ -618,7 +630,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	}
 
 	if (srcmap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, srcmap);
+		status = iomap_write_begin_inline(inode, pos, page, srcmap);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9398b8c31323..ee6309967b77 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -379,22 +379,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 {
 	struct iov_iter *iter = dio->submit.iter;
 	size_t copied;
+	void *dst = iomap->inline_data + pos - iomap->offset;
 
-	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/* inline data must be inside a single page */
+	BUG_ON(length > PAGE_SIZE - offset_in_page(iomap->inline_data));
 
 	if (dio->flags & IOMAP_DIO_WRITE) {
 		loff_t size = inode->i_size;
 
 		if (pos > size)
-			memset(iomap->inline_data + size, 0, pos - size);
-		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+			memset(iomap->inline_data + size - iomap->offset,
+			       0, pos - size);
+		copied = copy_from_iter(dst, length, iter);
 		if (copied) {
 			if (pos + copied > size)
 				i_size_write(inode, pos + copied);
 			mark_inode_dirty(inode);
 		}
 	} else {
-		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+		copied = copy_to_iter(dst, length, iter);
 	}
 	dio->size += copied;
 	return copied;
-- 
2.24.4


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 14:47 [PATCH v3] iomap: support tail packing inline read Gao Xiang
@ 2021-07-19 15:02 ` Matthew Wilcox
  2021-07-19 15:13   ` Christoph Hellwig
  2021-07-19 15:29   ` Gao Xiang
  2021-07-20 11:23 ` Andreas Grünbacher
  2021-07-20 11:34 ` Andreas Grünbacher
  2 siblings, 2 replies; 13+ messages in thread
From: Matthew Wilcox @ 2021-07-19 15:02 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Darrick J . Wong, Andreas Gruenbacher, LKML, linux-fsdevel,
	linux-erofs, Christoph Hellwig

On Mon, Jul 19, 2021 at 10:47:47PM +0800, Gao Xiang wrote:
> @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	unsigned poff, plen;
>  	sector_t sector;
>  
> -	if (iomap->type == IOMAP_INLINE) {
> -		WARN_ON_ONCE(pos);
> -		iomap_read_inline_data(inode, page, iomap);
> -		return PAGE_SIZE;
> -	}
> -
> -	/* zero post-eof blocks as the page may be mapped */
>  	iop = iomap_page_create(inode, page);
> +	/* needs to skip some leading uptodated blocks */
>  	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>  	if (plen == 0)
>  		goto done;
>  
> +	if (iomap->type == IOMAP_INLINE) {
> +		iomap_read_inline_data(inode, page, iomap, pos);
> +		plen = PAGE_SIZE - poff;
> +		goto done;
> +	}

This is going to break Andreas' case that he just patched to work.
GFS2 needs for there to _not_ be an iop for inline data.  That's
why I said we need to sort out when to create an iop before moving
the IOMAP_INLINE case below the creation of the iop.

If we're not going to do that first, then I recommend leaving the
IOMAP_INLINE case where it is and changing it to ...

	if (iomap->type == IOMAP_INLINE)
		return iomap_read_inline_data(inode, page, iomap, pos);

... and have iomap_read_inline_data() return the number of bytes that
it copied + zeroed (ie PAGE_SIZE - poff).


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 15:02 ` Matthew Wilcox
@ 2021-07-19 15:13   ` Christoph Hellwig
  2021-07-19 16:11     ` Gao Xiang
  2021-07-19 15:29   ` Gao Xiang
  1 sibling, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2021-07-19 15:13 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Darrick J . Wong, Andreas Gruenbacher, LKML, linux-fsdevel,
	linux-erofs, Christoph Hellwig

On Mon, Jul 19, 2021 at 04:02:30PM +0100, Matthew Wilcox wrote:
> > +	if (iomap->type == IOMAP_INLINE) {
> > +		iomap_read_inline_data(inode, page, iomap, pos);
> > +		plen = PAGE_SIZE - poff;
> > +		goto done;
> > +	}
> 
> This is going to break Andreas' case that he just patched to work.
> GFS2 needs for there to _not_ be an iop for inline data.  That's
> why I said we need to sort out when to create an iop before moving
> the IOMAP_INLINE case below the creation of the iop.
> 
> If we're not going to do that first, then I recommend leaving the
> IOMAP_INLINE case where it is and changing it to ...
> 
> 	if (iomap->type == IOMAP_INLINE)
> 		return iomap_read_inline_data(inode, page, iomap, pos);
> 
> ... and have iomap_read_inline_data() return the number of bytes that
> it copied + zeroed (ie PAGE_SIZE - poff).

Returning the bytes is much cleaner anyway.  But we still need to deal
with the sub-page uptodate status in one form or another.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 15:02 ` Matthew Wilcox
  2021-07-19 15:13   ` Christoph Hellwig
@ 2021-07-19 15:29   ` Gao Xiang
  2021-07-19 15:31     ` Andreas Grünbacher
  1 sibling, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-07-19 15:29 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Darrick J . Wong, Andreas Gruenbacher, LKML, linux-fsdevel,
	linux-erofs, Christoph Hellwig

On Mon, Jul 19, 2021 at 04:02:30PM +0100, Matthew Wilcox wrote:
> On Mon, Jul 19, 2021 at 10:47:47PM +0800, Gao Xiang wrote:
> > @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >  	unsigned poff, plen;
> >  	sector_t sector;
> >  
> > -	if (iomap->type == IOMAP_INLINE) {
> > -		WARN_ON_ONCE(pos);
> > -		iomap_read_inline_data(inode, page, iomap);
> > -		return PAGE_SIZE;
> > -	}
> > -
> > -	/* zero post-eof blocks as the page may be mapped */
> >  	iop = iomap_page_create(inode, page);
> > +	/* needs to skip some leading uptodated blocks */
> >  	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
> >  	if (plen == 0)
> >  		goto done;
> >  
> > +	if (iomap->type == IOMAP_INLINE) {
> > +		iomap_read_inline_data(inode, page, iomap, pos);
> > +		plen = PAGE_SIZE - poff;
> > +		goto done;
> > +	}
> 
> This is going to break Andreas' case that he just patched to work.
> GFS2 needs for there to _not_ be an iop for inline data.  That's
> why I said we need to sort out when to create an iop before moving
> the IOMAP_INLINE case below the creation of the iop.

I have no idea how it breaks Andreas' case from the previous commit
message: "
iomap: Don't create iomap_page objects for inline files
In iomap_readpage_actor, don't create iop objects for inline inodes.
Otherwise, iomap_read_inline_data will set PageUptodate without setting
iop->uptodate, and iomap_page_release will eventually complain.

To prevent this kind of bug from occurring in the future, make sure the
page doesn't have private data attached in iomap_read_inline_data.
"

After this patch, iomap_read_inline_data() will set iop->uptodate with
iomap_set_range_uptodate() rather than set PageUptodate() directly, 
so iomap_page_release won't complain.

Am I missing something?

Thanks,
Gao Xiang

> 
> If we're not going to do that first, then I recommend leaving the
> IOMAP_INLINE case where it is and changing it to ...
> 
> 	if (iomap->type == IOMAP_INLINE)
> 		return iomap_read_inline_data(inode, page, iomap, pos);
> 
> ... and have iomap_read_inline_data() return the number of bytes that
> it copied + zeroed (ie PAGE_SIZE - poff).

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 15:29   ` Gao Xiang
@ 2021-07-19 15:31     ` Andreas Grünbacher
  2021-07-19 15:36       ` Gao Xiang
  0 siblings, 1 reply; 13+ messages in thread
From: Andreas Grünbacher @ 2021-07-19 15:31 UTC (permalink / raw)
  To: Matthew Wilcox, linux-erofs, Linux FS-devel Mailing List, LKML,
	Christoph Hellwig, Darrick J . Wong, Andreas Gruenbacher

Am Mo., 19. Juli 2021 um 17:29 Uhr schrieb Gao Xiang
<hsiangkao@linux.alibaba.com>:
> On Mon, Jul 19, 2021 at 04:02:30PM +0100, Matthew Wilcox wrote:
> > On Mon, Jul 19, 2021 at 10:47:47PM +0800, Gao Xiang wrote:
> > > @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> > >     unsigned poff, plen;
> > >     sector_t sector;
> > >
> > > -   if (iomap->type == IOMAP_INLINE) {
> > > -           WARN_ON_ONCE(pos);
> > > -           iomap_read_inline_data(inode, page, iomap);
> > > -           return PAGE_SIZE;
> > > -   }
> > > -
> > > -   /* zero post-eof blocks as the page may be mapped */
> > >     iop = iomap_page_create(inode, page);
> > > +   /* needs to skip some leading uptodated blocks */
> > >     iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
> > >     if (plen == 0)
> > >             goto done;
> > >
> > > +   if (iomap->type == IOMAP_INLINE) {
> > > +           iomap_read_inline_data(inode, page, iomap, pos);
> > > +           plen = PAGE_SIZE - poff;
> > > +           goto done;
> > > +   }
> >
> > This is going to break Andreas' case that he just patched to work.
> > GFS2 needs for there to _not_ be an iop for inline data.  That's
> > why I said we need to sort out when to create an iop before moving
> > the IOMAP_INLINE case below the creation of the iop.
>
> I have no idea how it breaks Andreas' case from the previous commit
> message: "
> iomap: Don't create iomap_page objects for inline files
> In iomap_readpage_actor, don't create iop objects for inline inodes.
> Otherwise, iomap_read_inline_data will set PageUptodate without setting
> iop->uptodate, and iomap_page_release will eventually complain.
>
> To prevent this kind of bug from occurring in the future, make sure the
> page doesn't have private data attached in iomap_read_inline_data.
> "
>
> After this patch, iomap_read_inline_data() will set iop->uptodate with
> iomap_set_range_uptodate() rather than set PageUptodate() directly,
> so iomap_page_release won't complain.

Yes, that actually looks fine.

Thanks,
Andreas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 15:31     ` Andreas Grünbacher
@ 2021-07-19 15:36       ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-07-19 15:36 UTC (permalink / raw)
  To: Andreas Grünbacher
  Cc: Darrick J . Wong, LKML, Matthew Wilcox,
	Linux FS-devel Mailing List, linux-erofs, Christoph Hellwig

On Mon, Jul 19, 2021 at 05:31:51PM +0200, Andreas Grünbacher wrote:
> Am Mo., 19. Juli 2021 um 17:29 Uhr schrieb Gao Xiang
> <hsiangkao@linux.alibaba.com>:
> > On Mon, Jul 19, 2021 at 04:02:30PM +0100, Matthew Wilcox wrote:
> > > On Mon, Jul 19, 2021 at 10:47:47PM +0800, Gao Xiang wrote:
> > > > @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> > > >     unsigned poff, plen;
> > > >     sector_t sector;
> > > >
> > > > -   if (iomap->type == IOMAP_INLINE) {
> > > > -           WARN_ON_ONCE(pos);
> > > > -           iomap_read_inline_data(inode, page, iomap);
> > > > -           return PAGE_SIZE;
> > > > -   }
> > > > -
> > > > -   /* zero post-eof blocks as the page may be mapped */
> > > >     iop = iomap_page_create(inode, page);
> > > > +   /* needs to skip some leading uptodated blocks */
> > > >     iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
> > > >     if (plen == 0)
> > > >             goto done;
> > > >
> > > > +   if (iomap->type == IOMAP_INLINE) {
> > > > +           iomap_read_inline_data(inode, page, iomap, pos);
> > > > +           plen = PAGE_SIZE - poff;
> > > > +           goto done;
> > > > +   }
> > >
> > > This is going to break Andreas' case that he just patched to work.
> > > GFS2 needs for there to _not_ be an iop for inline data.  That's
> > > why I said we need to sort out when to create an iop before moving
> > > the IOMAP_INLINE case below the creation of the iop.
> >
> > I have no idea how it breaks Andreas' case from the previous commit
> > message: "
> > iomap: Don't create iomap_page objects for inline files
> > In iomap_readpage_actor, don't create iop objects for inline inodes.
> > Otherwise, iomap_read_inline_data will set PageUptodate without setting
> > iop->uptodate, and iomap_page_release will eventually complain.
> >
> > To prevent this kind of bug from occurring in the future, make sure the
> > page doesn't have private data attached in iomap_read_inline_data.
> > "
> >
> > After this patch, iomap_read_inline_data() will set iop->uptodate with
> > iomap_set_range_uptodate() rather than set PageUptodate() directly,
> > so iomap_page_release won't complain.
> 
> Yes, that actually looks fine.

Yeah, although I admit it looks (maybe) somewhat sub-optimal, but I think
let's make it work correctly first. Then consider how to optimize it even
further (like drop iops or likewise...).

Just my humble suggestion of this from heart....

Thanks,
Gao Xiang

> 
> Thanks,
> Andreas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 15:13   ` Christoph Hellwig
@ 2021-07-19 16:11     ` Gao Xiang
  2021-07-19 16:31       ` Matthew Wilcox
  0 siblings, 1 reply; 13+ messages in thread
From: Gao Xiang @ 2021-07-19 16:11 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Darrick J . Wong, Andreas Gruenbacher, LKML, Matthew Wilcox,
	linux-fsdevel, linux-erofs

On Mon, Jul 19, 2021 at 05:13:10PM +0200, Christoph Hellwig wrote:
> On Mon, Jul 19, 2021 at 04:02:30PM +0100, Matthew Wilcox wrote:
> > > +	if (iomap->type == IOMAP_INLINE) {
> > > +		iomap_read_inline_data(inode, page, iomap, pos);
> > > +		plen = PAGE_SIZE - poff;
> > > +		goto done;
> > > +	}
> > 
> > This is going to break Andreas' case that he just patched to work.
> > GFS2 needs for there to _not_ be an iop for inline data.  That's
> > why I said we need to sort out when to create an iop before moving
> > the IOMAP_INLINE case below the creation of the iop.
> > 
> > If we're not going to do that first, then I recommend leaving the
> > IOMAP_INLINE case where it is and changing it to ...
> > 
> > 	if (iomap->type == IOMAP_INLINE)
> > 		return iomap_read_inline_data(inode, page, iomap, pos);
> > 
> > ... and have iomap_read_inline_data() return the number of bytes that
> > it copied + zeroed (ie PAGE_SIZE - poff).
> 
> Returning the bytes is much cleaner anyway.  But we still need to deal
> with the sub-page uptodate status in one form or another.

There is another iomap_read_inline_data() caller as in:
+static int iomap_write_begin_inline(struct inode *inode, loff_t pos,
+		struct page *page, struct iomap *srcmap)
+{
+	/* needs more work for the tailpacking case, disable for now */
+	if (WARN_ON_ONCE(pos != 0))
+		return -EIO;
+	if (PageUptodate(page))
+		return 0;
+	iomap_read_inline_data(inode, page, srcmap, pos);
+	return 0;
+}

I'd like to avoid it as (void)iomap_read_inline_data(...). That's why it
left as void return type.

Anyway, let's check if it works correctly first. I think it's a high
priority stuff, and EROFS uncompressed cases can be entirely cleaned
up with iomap then.

Thanks,
Gao Xiang

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 16:11     ` Gao Xiang
@ 2021-07-19 16:31       ` Matthew Wilcox
  2021-07-19 16:45         ` Gao Xiang
  0 siblings, 1 reply; 13+ messages in thread
From: Matthew Wilcox @ 2021-07-19 16:31 UTC (permalink / raw)
  To: Christoph Hellwig, linux-erofs, linux-fsdevel, LKML,
	Darrick J . Wong, Andreas Gruenbacher

On Tue, Jul 20, 2021 at 12:11:49AM +0800, Gao Xiang wrote:
> On Mon, Jul 19, 2021 at 05:13:10PM +0200, Christoph Hellwig wrote:
> > On Mon, Jul 19, 2021 at 04:02:30PM +0100, Matthew Wilcox wrote:
> > > > +	if (iomap->type == IOMAP_INLINE) {
> > > > +		iomap_read_inline_data(inode, page, iomap, pos);
> > > > +		plen = PAGE_SIZE - poff;
> > > > +		goto done;
> > > > +	}
> > > 
> > > This is going to break Andreas' case that he just patched to work.
> > > GFS2 needs for there to _not_ be an iop for inline data.  That's
> > > why I said we need to sort out when to create an iop before moving
> > > the IOMAP_INLINE case below the creation of the iop.
> > > 
> > > If we're not going to do that first, then I recommend leaving the
> > > IOMAP_INLINE case where it is and changing it to ...
> > > 
> > > 	if (iomap->type == IOMAP_INLINE)
> > > 		return iomap_read_inline_data(inode, page, iomap, pos);
> > > 
> > > ... and have iomap_read_inline_data() return the number of bytes that
> > > it copied + zeroed (ie PAGE_SIZE - poff).
> > 
> > Returning the bytes is much cleaner anyway.  But we still need to deal
> > with the sub-page uptodate status in one form or another.
> 
> There is another iomap_read_inline_data() caller as in:
> +static int iomap_write_begin_inline(struct inode *inode, loff_t pos,
> +		struct page *page, struct iomap *srcmap)
> +{
> +	/* needs more work for the tailpacking case, disable for now */
> +	if (WARN_ON_ONCE(pos != 0))
> +		return -EIO;
> +	if (PageUptodate(page))
> +		return 0;
> +	iomap_read_inline_data(inode, page, srcmap, pos);
> +	return 0;
> +}
> 
> I'd like to avoid it as (void)iomap_read_inline_data(...). That's why it
> left as void return type.

You don't need to cast away the return value in C.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 16:31       ` Matthew Wilcox
@ 2021-07-19 16:45         ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-07-19 16:45 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Darrick J . Wong, Andreas Gruenbacher, LKML, linux-fsdevel,
	linux-erofs, Christoph Hellwig

On Mon, Jul 19, 2021 at 05:31:10PM +0100, Matthew Wilcox wrote:
> On Tue, Jul 20, 2021 at 12:11:49AM +0800, Gao Xiang wrote:
> > On Mon, Jul 19, 2021 at 05:13:10PM +0200, Christoph Hellwig wrote:
> > > On Mon, Jul 19, 2021 at 04:02:30PM +0100, Matthew Wilcox wrote:
> > > > > +	if (iomap->type == IOMAP_INLINE) {
> > > > > +		iomap_read_inline_data(inode, page, iomap, pos);
> > > > > +		plen = PAGE_SIZE - poff;
> > > > > +		goto done;
> > > > > +	}
> > > > 
> > > > This is going to break Andreas' case that he just patched to work.
> > > > GFS2 needs for there to _not_ be an iop for inline data.  That's
> > > > why I said we need to sort out when to create an iop before moving
> > > > the IOMAP_INLINE case below the creation of the iop.
> > > > 
> > > > If we're not going to do that first, then I recommend leaving the
> > > > IOMAP_INLINE case where it is and changing it to ...
> > > > 
> > > > 	if (iomap->type == IOMAP_INLINE)
> > > > 		return iomap_read_inline_data(inode, page, iomap, pos);
> > > > 
> > > > ... and have iomap_read_inline_data() return the number of bytes that
> > > > it copied + zeroed (ie PAGE_SIZE - poff).
> > > 
> > > Returning the bytes is much cleaner anyway.  But we still need to deal
> > > with the sub-page uptodate status in one form or another.
> > 
> > There is another iomap_read_inline_data() caller as in:
> > +static int iomap_write_begin_inline(struct inode *inode, loff_t pos,
> > +		struct page *page, struct iomap *srcmap)
> > +{
> > +	/* needs more work for the tailpacking case, disable for now */
> > +	if (WARN_ON_ONCE(pos != 0))
> > +		return -EIO;
> > +	if (PageUptodate(page))
> > +		return 0;
> > +	iomap_read_inline_data(inode, page, srcmap, pos);
> > +	return 0;
> > +}
> > 
> > I'd like to avoid it as (void)iomap_read_inline_data(...). That's why it
> > left as void return type.
> 
> You don't need to cast away the return value in C.

Well, I don't check the current behavior of this, but see:
http://c-faq.com/style/voidcasts.html

Anyway, that's minor and easy to update if really needed.
I'd like to check if it works first...

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 14:47 [PATCH v3] iomap: support tail packing inline read Gao Xiang
  2021-07-19 15:02 ` Matthew Wilcox
@ 2021-07-20 11:23 ` Andreas Grünbacher
  2021-07-20 12:10   ` Gao Xiang
  2021-07-20 11:34 ` Andreas Grünbacher
  2 siblings, 1 reply; 13+ messages in thread
From: Andreas Grünbacher @ 2021-07-20 11:23 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Darrick J . Wong, LKML, Matthew Wilcox,
	Linux FS-devel Mailing List, linux-erofs, Christoph Hellwig

Am Mo., 19. Juli 2021 um 16:48 Uhr schrieb Gao Xiang
<hsiangkao@linux.alibaba.com>:
> This tries to add tail packing inline read to iomap, which can support
> several inline tail blocks. Similar to the previous approach, it cleans
> post-EOF in one iteration.
>
> The write path remains untouched since EROFS cannot be used for testing.
> It'd be better to be implemented if upcoming real users care rather than
> leave untested dead code around.
>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Darrick J. Wong <djwong@kernel.org>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
> v2: https://lore.kernel.org/r/YPLdSja%2F4FBsjss%2F@B-P7TQMD6M-0146.local/
> changes since v2:
>  - update suggestion from Christoph:
>     https://lore.kernel.org/r/YPVe41YqpfGLNsBS@infradead.org/
>
> Hi Andreas,
> would you mind test on the gfs2 side? Thanks in advance!
>
> Thanks,
> Gao Xiang
>
>  fs/iomap/buffered-io.c | 50 ++++++++++++++++++++++++++----------------
>  fs/iomap/direct-io.c   | 11 ++++++----
>  2 files changed, 38 insertions(+), 23 deletions(-)
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 87ccb3438bec..cac8a88660d8 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -207,23 +207,22 @@ struct iomap_readpage_ctx {
>
>  static void
>  iomap_read_inline_data(struct inode *inode, struct page *page,
> -               struct iomap *iomap)
> +               struct iomap *iomap, loff_t pos)
>  {
> -       size_t size = i_size_read(inode);
> +       unsigned int size, poff = offset_in_page(pos);
>         void *addr;
>
> -       if (PageUptodate(page))
> -               return;
> -
> -       BUG_ON(page_has_private(page));
> -       BUG_ON(page->index);
> -       BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +       /* inline source data must be inside a single page */
> +       BUG_ON(iomap->length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +       /* handle tail-packing blocks cross the current page into the next */
> +       size = min_t(unsigned int, iomap->length + pos - iomap->offset,
> +                    PAGE_SIZE - poff);

Hmm, so EROFS really does multi-page tail packing? This contradicts
the comment and code in iomap_dio_inline_actor.

>         addr = kmap_atomic(page);
> -       memcpy(addr, iomap->inline_data, size);
> -       memset(addr + size, 0, PAGE_SIZE - size);
> +       memcpy(addr + poff, iomap->inline_data - iomap->offset + pos, size);
> +       memset(addr + poff + size, 0, PAGE_SIZE - poff - size);
>         kunmap_atomic(addr);
> -       SetPageUptodate(page);
> +       iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
>  }

It's been said before, but iomap_read_inline_data should return
PAGE_SIZE - poff, and no (void) casting when the return value is
ignored.

>  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>         unsigned poff, plen;
>         sector_t sector;
>
> -       if (iomap->type == IOMAP_INLINE) {
> -               WARN_ON_ONCE(pos);
> -               iomap_read_inline_data(inode, page, iomap);
> -               return PAGE_SIZE;
> -       }
> -
> -       /* zero post-eof blocks as the page may be mapped */
>         iop = iomap_page_create(inode, page);
> +       /* needs to skip some leading uptodated blocks */

"needs to skip some leading uptodate blocks"

>         iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>         if (plen == 0)
>                 goto done;
>
> +       if (iomap->type == IOMAP_INLINE) {
> +               iomap_read_inline_data(inode, page, iomap, pos);
> +               plen = PAGE_SIZE - poff;
> +               goto done;
> +       }
> +
> +       /* zero post-eof blocks as the page may be mapped */
>         if (iomap_block_needs_zeroing(inode, iomap, pos)) {
>                 zero_user(page, poff, plen);
>                 iomap_set_range_uptodate(page, poff, plen);
> @@ -589,6 +589,18 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>         return 0;
>  }
>
> +static int iomap_write_begin_inline(struct inode *inode, loff_t pos,
> +               struct page *page, struct iomap *srcmap)
> +{
> +       /* needs more work for the tailpacking case, disable for now */
> +       if (WARN_ON_ONCE(pos != 0))

This should be a WARN_ON_ONCE(srcmap->offset != 0). Otherwise, something like:

  xfs_io -ft -c 'pwrite 1 2'

will fail because pos will be 1.

> +               return -EIO;
> +       if (PageUptodate(page))
> +               return 0;
> +       iomap_read_inline_data(inode, page, srcmap, pos);

The above means that passing pos to iomap_read_inline_data here won't
do the right thing, either.

> +       return 0;
> +}
> +
>  static int
>  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>                 struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> @@ -618,7 +630,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>         }
>
>         if (srcmap->type == IOMAP_INLINE)
> -               iomap_read_inline_data(inode, page, srcmap);
> +               status = iomap_write_begin_inline(inode, pos, page, srcmap);
>         else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
>                 status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>         else
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 9398b8c31323..ee6309967b77 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -379,22 +379,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  {
>         struct iov_iter *iter = dio->submit.iter;
>         size_t copied;
> +       void *dst = iomap->inline_data + pos - iomap->offset;
>
> -       BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +       /* inline data must be inside a single page */
> +       BUG_ON(length > PAGE_SIZE - offset_in_page(iomap->inline_data));
>
>         if (dio->flags & IOMAP_DIO_WRITE) {
>                 loff_t size = inode->i_size;
>
>                 if (pos > size)
> -                       memset(iomap->inline_data + size, 0, pos - size);
> -               copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> +                       memset(iomap->inline_data + size - iomap->offset,
> +                              0, pos - size);
> +               copied = copy_from_iter(dst, length, iter);
>                 if (copied) {
>                         if (pos + copied > size)
>                                 i_size_write(inode, pos + copied);
>                         mark_inode_dirty(inode);
>                 }
>         } else {
> -               copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> +               copied = copy_to_iter(dst, length, iter);
>         }
>         dio->size += copied;
>         return copied;
> --
> 2.24.4
>

Thanks,
Andreas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-19 14:47 [PATCH v3] iomap: support tail packing inline read Gao Xiang
  2021-07-19 15:02 ` Matthew Wilcox
  2021-07-20 11:23 ` Andreas Grünbacher
@ 2021-07-20 11:34 ` Andreas Grünbacher
  2021-07-20 12:23   ` Gao Xiang
  2 siblings, 1 reply; 13+ messages in thread
From: Andreas Grünbacher @ 2021-07-20 11:34 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Darrick J . Wong, LKML, Matthew Wilcox,
	Linux FS-devel Mailing List, linux-erofs, Christoph Hellwig

Am Mo., 19. Juli 2021 um 16:48 Uhr schrieb Gao Xiang
<hsiangkao@linux.alibaba.com>:
> This tries to add tail packing inline read to iomap, which can support
> several inline tail blocks. Similar to the previous approach, it cleans
> post-EOF in one iteration.
>
> The write path remains untouched since EROFS cannot be used for testing.
> It'd be better to be implemented if upcoming real users care rather than
> leave untested dead code around.
>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Darrick J. Wong <djwong@kernel.org>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
> v2: https://lore.kernel.org/r/YPLdSja%2F4FBsjss%2F@B-P7TQMD6M-0146.local/
> changes since v2:
>  - update suggestion from Christoph:
>     https://lore.kernel.org/r/YPVe41YqpfGLNsBS@infradead.org/
>
> Hi Andreas,
> would you mind test on the gfs2 side? Thanks in advance!
>
> Thanks,
> Gao Xiang
>
>  fs/iomap/buffered-io.c | 50 ++++++++++++++++++++++++++----------------
>  fs/iomap/direct-io.c   | 11 ++++++----
>  2 files changed, 38 insertions(+), 23 deletions(-)
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 87ccb3438bec..cac8a88660d8 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -207,23 +207,22 @@ struct iomap_readpage_ctx {
>
>  static void
>  iomap_read_inline_data(struct inode *inode, struct page *page,
> -               struct iomap *iomap)
> +               struct iomap *iomap, loff_t pos)
>  {
> -       size_t size = i_size_read(inode);
> +       unsigned int size, poff = offset_in_page(pos);
>         void *addr;
>
> -       if (PageUptodate(page))
> -               return;
> -
> -       BUG_ON(page_has_private(page));
> -       BUG_ON(page->index);
> -       BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +       /* inline source data must be inside a single page */
> +       BUG_ON(iomap->length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +       /* handle tail-packing blocks cross the current page into the next */
> +       size = min_t(unsigned int, iomap->length + pos - iomap->offset,
> +                    PAGE_SIZE - poff);
>
>         addr = kmap_atomic(page);
> -       memcpy(addr, iomap->inline_data, size);
> -       memset(addr + size, 0, PAGE_SIZE - size);
> +       memcpy(addr + poff, iomap->inline_data - iomap->offset + pos, size);
> +       memset(addr + poff + size, 0, PAGE_SIZE - poff - size);
>         kunmap_atomic(addr);
> -       SetPageUptodate(page);
> +       iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
>  }
>
>  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>         unsigned poff, plen;
>         sector_t sector;
>
> -       if (iomap->type == IOMAP_INLINE) {
> -               WARN_ON_ONCE(pos);
> -               iomap_read_inline_data(inode, page, iomap);
> -               return PAGE_SIZE;
> -       }
> -
> -       /* zero post-eof blocks as the page may be mapped */
>         iop = iomap_page_create(inode, page);

We can skip creating the iop when reading the entire page.

> +       /* needs to skip some leading uptodated blocks */
>         iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>         if (plen == 0)
>                 goto done;
>
> +       if (iomap->type == IOMAP_INLINE) {
> +               iomap_read_inline_data(inode, page, iomap, pos);
> +               plen = PAGE_SIZE - poff;
> +               goto done;
> +       }
> +
> +       /* zero post-eof blocks as the page may be mapped */
>         if (iomap_block_needs_zeroing(inode, iomap, pos)) {
>                 zero_user(page, poff, plen);
>                 iomap_set_range_uptodate(page, poff, plen);
> @@ -589,6 +589,18 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>         return 0;
>  }
>
> +static int iomap_write_begin_inline(struct inode *inode, loff_t pos,
> +               struct page *page, struct iomap *srcmap)
> +{
> +       /* needs more work for the tailpacking case, disable for now */
> +       if (WARN_ON_ONCE(pos != 0))
> +               return -EIO;
> +       if (PageUptodate(page))
> +               return 0;
> +       iomap_read_inline_data(inode, page, srcmap, pos);
> +       return 0;
> +}
> +
>  static int
>  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>                 struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> @@ -618,7 +630,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>         }
>
>         if (srcmap->type == IOMAP_INLINE)
> -               iomap_read_inline_data(inode, page, srcmap);
> +               status = iomap_write_begin_inline(inode, pos, page, srcmap);
>         else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
>                 status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>         else
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 9398b8c31323..ee6309967b77 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -379,22 +379,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  {
>         struct iov_iter *iter = dio->submit.iter;
>         size_t copied;
> +       void *dst = iomap->inline_data + pos - iomap->offset;
>
> -       BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +       /* inline data must be inside a single page */
> +       BUG_ON(length > PAGE_SIZE - offset_in_page(iomap->inline_data));
>
>         if (dio->flags & IOMAP_DIO_WRITE) {
>                 loff_t size = inode->i_size;
>
>                 if (pos > size)
> -                       memset(iomap->inline_data + size, 0, pos - size);
> -               copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> +                       memset(iomap->inline_data + size - iomap->offset,
> +                              0, pos - size);
> +               copied = copy_from_iter(dst, length, iter);
>                 if (copied) {
>                         if (pos + copied > size)
>                                 i_size_write(inode, pos + copied);
>                         mark_inode_dirty(inode);
>                 }
>         } else {
> -               copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> +               copied = copy_to_iter(dst, length, iter);
>         }
>         dio->size += copied;
>         return copied;
> --
> 2.24.4
>

Thanks,
Andreas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-20 11:23 ` Andreas Grünbacher
@ 2021-07-20 12:10   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-07-20 12:10 UTC (permalink / raw)
  To: Andreas Grünbacher
  Cc: Darrick J . Wong, LKML, Matthew Wilcox,
	Linux FS-devel Mailing List, linux-erofs, Christoph Hellwig

Hi Andreas,

On Tue, Jul 20, 2021 at 01:23:41PM +0200, Andreas Grünbacher wrote:
> Am Mo., 19. Juli 2021 um 16:48 Uhr schrieb Gao Xiang
> <hsiangkao@linux.alibaba.com>:
> > This tries to add tail packing inline read to iomap, which can support
> > several inline tail blocks. Similar to the previous approach, it cleans
> > post-EOF in one iteration.
> >
> > The write path remains untouched since EROFS cannot be used for testing.
> > It'd be better to be implemented if upcoming real users care rather than
> > leave untested dead code around.
> >
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Darrick J. Wong <djwong@kernel.org>
> > Cc: Matthew Wilcox <willy@infradead.org>
> > Cc: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
> > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > ---
> > v2: https://lore.kernel.org/r/YPLdSja%2F4FBsjss%2F@B-P7TQMD6M-0146.local/
> > changes since v2:
> >  - update suggestion from Christoph:
> >     https://lore.kernel.org/r/YPVe41YqpfGLNsBS@infradead.org/
> >
> > Hi Andreas,
> > would you mind test on the gfs2 side? Thanks in advance!
> >
> > Thanks,
> > Gao Xiang
> >
> >  fs/iomap/buffered-io.c | 50 ++++++++++++++++++++++++++----------------
> >  fs/iomap/direct-io.c   | 11 ++++++----
> >  2 files changed, 38 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index 87ccb3438bec..cac8a88660d8 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -207,23 +207,22 @@ struct iomap_readpage_ctx {
> >
> >  static void
> >  iomap_read_inline_data(struct inode *inode, struct page *page,
> > -               struct iomap *iomap)
> > +               struct iomap *iomap, loff_t pos)
> >  {
> > -       size_t size = i_size_read(inode);
> > +       unsigned int size, poff = offset_in_page(pos);
> >         void *addr;
> >
> > -       if (PageUptodate(page))
> > -               return;
> > -
> > -       BUG_ON(page_has_private(page));
> > -       BUG_ON(page->index);
> > -       BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +       /* inline source data must be inside a single page */
> > +       BUG_ON(iomap->length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +       /* handle tail-packing blocks cross the current page into the next */
> > +       size = min_t(unsigned int, iomap->length + pos - iomap->offset,
> > +                    PAGE_SIZE - poff);
> 
> Hmm, so EROFS really does multi-page tail packing? This contradicts
> the comment and code in iomap_dio_inline_actor.

No, it doesn't really contradict anything. There are 2 different concepts,
the one is the metapage of iomap->inline_data itself. It should be in the
same page, so the inode itself and inline data can be in the same page
since currently assumed we don't support partial page read.
   ___________________________________________________
   | inode |              inline data                 |
   |<-------------- metadata page ------------------->|

 (here inline data can be multiple blocks.)
   
The other one is actual file tail blocks, I think it can cross pages due
to multiple tail inline blocks.
                            |<---------- inline data ------------->|
  _________________________________________________________________
  | file block | file block | file block | file block | file block |
  |<----------------    page   ---------------------->|<---  page

Although EROFS currently only support page-sized block, but I will look
into subpage-sized blocks after iomap work is done (due to PAGE_SIZE of
some platform is large, e.g. 64kb rather than 4kb.)

> 
> >         addr = kmap_atomic(page);
> > -       memcpy(addr, iomap->inline_data, size);
> > -       memset(addr + size, 0, PAGE_SIZE - size);
> > +       memcpy(addr + poff, iomap->inline_data - iomap->offset + pos, size);
> > +       memset(addr + poff + size, 0, PAGE_SIZE - poff - size);
> >         kunmap_atomic(addr);
> > -       SetPageUptodate(page);
> > +       iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
> >  }
> 
> It's been said before, but iomap_read_inline_data should return
> PAGE_SIZE - poff, and no (void) casting when the return value is
> ignored.

Ok, anyway, I could update it in the next version.

> 
> >  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> > @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >         unsigned poff, plen;
> >         sector_t sector;
> >
> > -       if (iomap->type == IOMAP_INLINE) {
> > -               WARN_ON_ONCE(pos);
> > -               iomap_read_inline_data(inode, page, iomap);
> > -               return PAGE_SIZE;
> > -       }
> > -
> > -       /* zero post-eof blocks as the page may be mapped */
> >         iop = iomap_page_create(inode, page);
> > +       /* needs to skip some leading uptodated blocks */
> 
> "needs to skip some leading uptodate blocks"

will update.

> 
> >         iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
> >         if (plen == 0)
> >                 goto done;
> >
> > +       if (iomap->type == IOMAP_INLINE) {
> > +               iomap_read_inline_data(inode, page, iomap, pos);
> > +               plen = PAGE_SIZE - poff;
> > +               goto done;
> > +       }
> > +
> > +       /* zero post-eof blocks as the page may be mapped */
> >         if (iomap_block_needs_zeroing(inode, iomap, pos)) {
> >                 zero_user(page, poff, plen);
> >                 iomap_set_range_uptodate(page, poff, plen);
> > @@ -589,6 +589,18 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> >         return 0;
> >  }
> >
> > +static int iomap_write_begin_inline(struct inode *inode, loff_t pos,
> > +               struct page *page, struct iomap *srcmap)
> > +{
> > +       /* needs more work for the tailpacking case, disable for now */
> > +       if (WARN_ON_ONCE(pos != 0))
> 
> This should be a WARN_ON_ONCE(srcmap->offset != 0). Otherwise, something like:
> 
>   xfs_io -ft -c 'pwrite 1 2'
> 
> will fail because pos will be 1.

Yeah, will update. Thanks for pointing out!

> 
> > +               return -EIO;
> > +       if (PageUptodate(page))
> > +               return 0;
> > +       iomap_read_inline_data(inode, page, srcmap, pos);
> 
> The above means that passing pos to iomap_read_inline_data here won't
> do the right thing, either.

yeah, I think it should use 0 here instead. since iomap->offset == 0

Thanks,
Gao Xiang

> 
> > +       return 0;
> > +}
> > +
> >  static int
> >  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> >                 struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> > @@ -618,7 +630,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> >         }
> >
> >         if (srcmap->type == IOMAP_INLINE)
> > -               iomap_read_inline_data(inode, page, srcmap);
> > +               status = iomap_write_begin_inline(inode, pos, page, srcmap);
> >         else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
> >                 status = __block_write_begin_int(page, pos, len, NULL, srcmap);
> >         else
> > diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> > index 9398b8c31323..ee6309967b77 100644
> > --- a/fs/iomap/direct-io.c
> > +++ b/fs/iomap/direct-io.c
> > @@ -379,22 +379,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
> >  {
> >         struct iov_iter *iter = dio->submit.iter;
> >         size_t copied;
> > +       void *dst = iomap->inline_data + pos - iomap->offset;
> >
> > -       BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +       /* inline data must be inside a single page */
> > +       BUG_ON(length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> >
> >         if (dio->flags & IOMAP_DIO_WRITE) {
> >                 loff_t size = inode->i_size;
> >
> >                 if (pos > size)
> > -                       memset(iomap->inline_data + size, 0, pos - size);
> > -               copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> > +                       memset(iomap->inline_data + size - iomap->offset,
> > +                              0, pos - size);
> > +               copied = copy_from_iter(dst, length, iter);
> >                 if (copied) {
> >                         if (pos + copied > size)
> >                                 i_size_write(inode, pos + copied);
> >                         mark_inode_dirty(inode);
> >                 }
> >         } else {
> > -               copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> > +               copied = copy_to_iter(dst, length, iter);
> >         }
> >         dio->size += copied;
> >         return copied;
> > --
> > 2.24.4
> >
> 
> Thanks,
> Andreas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] iomap: support tail packing inline read
  2021-07-20 11:34 ` Andreas Grünbacher
@ 2021-07-20 12:23   ` Gao Xiang
  0 siblings, 0 replies; 13+ messages in thread
From: Gao Xiang @ 2021-07-20 12:23 UTC (permalink / raw)
  To: Andreas Grünbacher
  Cc: Darrick J . Wong, LKML, Matthew Wilcox,
	Linux FS-devel Mailing List, linux-erofs, Christoph Hellwig

On Tue, Jul 20, 2021 at 01:34:58PM +0200, Andreas Grünbacher wrote:
> Am Mo., 19. Juli 2021 um 16:48 Uhr schrieb Gao Xiang
> <hsiangkao@linux.alibaba.com>:
> > This tries to add tail packing inline read to iomap, which can support
> > several inline tail blocks. Similar to the previous approach, it cleans
> > post-EOF in one iteration.
> >
> > The write path remains untouched since EROFS cannot be used for testing.
> > It'd be better to be implemented if upcoming real users care rather than
> > leave untested dead code around.
> >
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Darrick J. Wong <djwong@kernel.org>
> > Cc: Matthew Wilcox <willy@infradead.org>
> > Cc: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
> > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > ---
> > v2: https://lore.kernel.org/r/YPLdSja%2F4FBsjss%2F@B-P7TQMD6M-0146.local/
> > changes since v2:
> >  - update suggestion from Christoph:
> >     https://lore.kernel.org/r/YPVe41YqpfGLNsBS@infradead.org/
> >
> > Hi Andreas,
> > would you mind test on the gfs2 side? Thanks in advance!
> >
> > Thanks,
> > Gao Xiang
> >
> >  fs/iomap/buffered-io.c | 50 ++++++++++++++++++++++++++----------------
> >  fs/iomap/direct-io.c   | 11 ++++++----
> >  2 files changed, 38 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index 87ccb3438bec..cac8a88660d8 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -207,23 +207,22 @@ struct iomap_readpage_ctx {
> >
> >  static void
> >  iomap_read_inline_data(struct inode *inode, struct page *page,
> > -               struct iomap *iomap)
> > +               struct iomap *iomap, loff_t pos)
> >  {
> > -       size_t size = i_size_read(inode);
> > +       unsigned int size, poff = offset_in_page(pos);
> >         void *addr;
> >
> > -       if (PageUptodate(page))
> > -               return;
> > -
> > -       BUG_ON(page_has_private(page));
> > -       BUG_ON(page->index);
> > -       BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +       /* inline source data must be inside a single page */
> > +       BUG_ON(iomap->length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +       /* handle tail-packing blocks cross the current page into the next */
> > +       size = min_t(unsigned int, iomap->length + pos - iomap->offset,
> > +                    PAGE_SIZE - poff);
> >
> >         addr = kmap_atomic(page);
> > -       memcpy(addr, iomap->inline_data, size);
> > -       memset(addr + size, 0, PAGE_SIZE - size);
> > +       memcpy(addr + poff, iomap->inline_data - iomap->offset + pos, size);
> > +       memset(addr + poff + size, 0, PAGE_SIZE - poff - size);
> >         kunmap_atomic(addr);
> > -       SetPageUptodate(page);
> > +       iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
> >  }
> >
> >  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> > @@ -246,18 +245,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >         unsigned poff, plen;
> >         sector_t sector;
> >
> > -       if (iomap->type == IOMAP_INLINE) {
> > -               WARN_ON_ONCE(pos);
> > -               iomap_read_inline_data(inode, page, iomap);
> > -               return PAGE_SIZE;
> > -       }
> > -
> > -       /* zero post-eof blocks as the page may be mapped */
> >         iop = iomap_page_create(inode, page);
> 
> We can skip creating the iop when reading the entire page.

As I said before, I think it can be in a separated patch like
https://lore.kernel.org/r/YPMkKfegS+9KzEhK@casper.infradead.org/
and Christoph said it should be careful:
https://lore.kernel.org/r/YPVfxn6%2FoCPBZpKu@infradead.org/

Thanks,
Gao Xiang

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-07-20 12:24 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-19 14:47 [PATCH v3] iomap: support tail packing inline read Gao Xiang
2021-07-19 15:02 ` Matthew Wilcox
2021-07-19 15:13   ` Christoph Hellwig
2021-07-19 16:11     ` Gao Xiang
2021-07-19 16:31       ` Matthew Wilcox
2021-07-19 16:45         ` Gao Xiang
2021-07-19 15:29   ` Gao Xiang
2021-07-19 15:31     ` Andreas Grünbacher
2021-07-19 15:36       ` Gao Xiang
2021-07-20 11:23 ` Andreas Grünbacher
2021-07-20 12:10   ` Gao Xiang
2021-07-20 11:34 ` Andreas Grünbacher
2021-07-20 12:23   ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).