linux-erofs.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v8] iomap: make inline data support more flexible
@ 2021-07-26 14:57 Gao Xiang
  2021-07-26 14:58 ` Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Gao Xiang @ 2021-07-26 14:57 UTC (permalink / raw)
  To: linux-erofs, linux-fsdevel
  Cc: Andreas Gruenbacher, Darrick J . Wong, LKML, Matthew Wilcox,
	Joseph Qi, Christoph Hellwig

The existing inline data support only works for cases where the entire
file is stored as inline data.  For larger files, EROFS stores the
initial blocks separately and then can pack a small tail adjacent to the
inode.  Generalise inline data to allow for tail packing.  Tails may not
cross a page boundary in memory.

We currently have no filesystems that support tail packing and writing,
so that case is currently disabled (see iomap_write_begin_inline).

Cc: Christoph Hellwig <hch@lst.de>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
v7: https://lore.kernel.org/r/20210723174131.180813-1-hsiangkao@linux.alibaba.com
changes since v7:
 - This version is based on Andreas's patch, the main difference
   is to avoid using "iomap->length" in iomap_read_inline_data().
   more details see:
    https://lore.kernel.org/r/CAHpGcMJhuSApy4eg9jKe2pYq4d7bY-Lg-Bmo9tOANghQ2Hxo-A@mail.gmail.com
   The rest are similar (some renaming and return type changes.)

 - with update according to Christoph's comments:
   https://lore.kernel.org/r/20210726121702.GA528@lst.de/
   except that "
    I think we should fix that now that we have the srcmap concept.
    That is or IOMAP_WRITE|IOMAP_ZERO return the inline map as the
    soure map, and return the actual block map we plan to write into
    as the main iomap. "
   Hopefully it could be addressed with a new gfs2-related patch.

 - it passes gfs2 fstests and no strange on my side.

Hopefully I don't miss anything (already many inputs), and everyone
is happy with this version.

 fs/iomap/buffered-io.c | 40 ++++++++++++++++++++++++++++------------
 fs/iomap/direct-io.c   | 10 ++++++----
 include/linux/iomap.h  | 18 ++++++++++++++++++
 3 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 87ccb3438bec..0d9f161ecb7e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -205,25 +205,30 @@ struct iomap_readpage_ctx {
 	struct readahead_control *rac;
 };
 
-static void
-iomap_read_inline_data(struct inode *inode, struct page *page,
+static int iomap_read_inline_data(struct inode *inode, struct page *page,
 		struct iomap *iomap)
 {
-	size_t size = i_size_read(inode);
+	size_t size = i_size_read(inode) - iomap->offset;
 	void *addr;
 
 	if (PageUptodate(page))
-		return;
+		return 0;
 
-	BUG_ON(page_has_private(page));
-	BUG_ON(page->index);
-	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/* inline data must start page aligned in the file */
+	if (WARN_ON_ONCE(offset_in_page(iomap->offset)))
+		return -EIO;
+	if (WARN_ON_ONCE(size > PAGE_SIZE -
+			 offset_in_page(iomap->inline_data)))
+		return -EIO;
+	if (WARN_ON_ONCE(page_has_private(page)))
+		return -EIO;
 
 	addr = kmap_atomic(page);
 	memcpy(addr, iomap->inline_data, size);
 	memset(addr + size, 0, PAGE_SIZE - size);
 	kunmap_atomic(addr);
 	SetPageUptodate(page);
+	return 0;
 }
 
 static inline bool iomap_block_needs_zeroing(struct inode *inode,
@@ -247,8 +252,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	sector_t sector;
 
 	if (iomap->type == IOMAP_INLINE) {
-		WARN_ON_ONCE(pos);
-		iomap_read_inline_data(inode, page, iomap);
+		int ret = iomap_read_inline_data(inode, page, iomap);
+
+		if (ret)
+			return ret;
 		return PAGE_SIZE;
 	}
 
@@ -589,6 +596,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 	return 0;
 }
 
+static int iomap_write_begin_inline(struct inode *inode,
+		struct page *page, struct iomap *srcmap)
+{
+	/* needs more work for the tailpacking case, disable for now */
+	if (WARN_ON_ONCE(srcmap->offset != 0))
+		return -EIO;
+	return iomap_read_inline_data(inode, page, srcmap);
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
@@ -618,7 +634,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	}
 
 	if (srcmap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, srcmap);
+		status = iomap_write_begin_inline(inode, page, srcmap);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
@@ -671,11 +687,11 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
 	void *addr;
 
 	WARN_ON_ONCE(!PageUptodate(page));
-	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	BUG_ON(!iomap_inline_data_valid(iomap));
 
 	flush_dcache_page(page);
 	addr = kmap_atomic(page);
-	memcpy(iomap->inline_data + pos, addr + pos, copied);
+	memcpy(iomap_inline_data(iomap, pos), addr + pos, copied);
 	kunmap_atomic(addr);
 
 	mark_inode_dirty(inode);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9398b8c31323..41ccbfc9dc82 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 		struct iomap_dio *dio, struct iomap *iomap)
 {
 	struct iov_iter *iter = dio->submit.iter;
+	void *inline_data = iomap_inline_data(iomap, pos);
 	size_t copied;
 
-	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
+		return -EIO;
 
 	if (dio->flags & IOMAP_DIO_WRITE) {
 		loff_t size = inode->i_size;
 
 		if (pos > size)
-			memset(iomap->inline_data + size, 0, pos - size);
-		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+			memset(iomap_inline_data(iomap, size), 0, pos - size);
+		copied = copy_from_iter(inline_data, length, iter);
 		if (copied) {
 			if (pos + copied > size)
 				i_size_write(inode, pos + copied);
 			mark_inode_dirty(inode);
 		}
 	} else {
-		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+		copied = copy_to_iter(inline_data, length, iter);
 	}
 	dio->size += copied;
 	return copied;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 479c1da3e221..b8ec145b2975 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -97,6 +97,24 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+/*
+ * Returns the inline data pointer for logical offset @pos.
+ */
+static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos)
+{
+	return iomap->inline_data + pos - iomap->offset;
+}
+
+/*
+ * Check if the mapping's length is within the valid range for inline data.
+ * This is used to guard against accessing data beyond the page inline_data
+ * points at.
+ */
+static inline bool iomap_inline_data_valid(struct iomap *iomap)
+{
+	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
+}
+
 /*
  * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
  * and page_done will be called for each page written to.  This only applies to
-- 
2.24.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v8] iomap: make inline data support more flexible
  2021-07-26 14:57 [PATCH v8] iomap: make inline data support more flexible Gao Xiang
@ 2021-07-26 14:58 ` Christoph Hellwig
  2021-07-26 18:19   ` Darrick J. Wong
  2021-07-26 15:06 ` Matthew Wilcox
  2021-07-26 22:10 ` Darrick J. Wong
  2 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2021-07-26 14:58 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Andreas Gruenbacher, Darrick J . Wong, LKML, Matthew Wilcox,
	Joseph Qi, linux-fsdevel, linux-erofs, Christoph Hellwig

Looks good to me:

Signed-off-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v8] iomap: make inline data support more flexible
  2021-07-26 14:57 [PATCH v8] iomap: make inline data support more flexible Gao Xiang
  2021-07-26 14:58 ` Christoph Hellwig
@ 2021-07-26 15:06 ` Matthew Wilcox
  2021-07-26 15:15   ` Gao Xiang
  2021-07-26 22:10 ` Darrick J. Wong
  2 siblings, 1 reply; 8+ messages in thread
From: Matthew Wilcox @ 2021-07-26 15:06 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Andreas Gruenbacher, Darrick J . Wong, LKML, Joseph Qi,
	linux-fsdevel, linux-erofs, Christoph Hellwig


Please make the Subject: 'iomap: Support file tail packing' as there
are clearly a number of ways to make the inline data support more
flexible ;-)

Other than that:

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v8] iomap: make inline data support more flexible
  2021-07-26 15:06 ` Matthew Wilcox
@ 2021-07-26 15:15   ` Gao Xiang
  0 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang @ 2021-07-26 15:15 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andreas Gruenbacher, Darrick J . Wong, LKML, Joseph Qi,
	linux-fsdevel, linux-erofs, Christoph Hellwig

Hi Matthew,

On Mon, Jul 26, 2021 at 04:06:47PM +0100, Matthew Wilcox wrote:
> 
> Please make the Subject: 'iomap: Support file tail packing' as there
> are clearly a number of ways to make the inline data support more
> flexible ;-)

Many thank all folks for the time and the review!

If Darrick is happy too, maybe leave him to update :-) 
(...I'm fear of screwing up anything now...)

> 
> Other than that:
> 
> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v8] iomap: make inline data support more flexible
  2021-07-26 14:58 ` Christoph Hellwig
@ 2021-07-26 18:19   ` Darrick J. Wong
  2021-07-26 18:24     ` Christoph Hellwig
  0 siblings, 1 reply; 8+ messages in thread
From: Darrick J. Wong @ 2021-07-26 18:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andreas Gruenbacher, LKML, Matthew Wilcox, Joseph Qi,
	linux-fsdevel, linux-erofs

On Mon, Jul 26, 2021 at 04:58:58PM +0200, Christoph Hellwig wrote:
> Looks good to me:
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Uh, did you mean RVB here?

--D

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v8] iomap: make inline data support more flexible
  2021-07-26 18:19   ` Darrick J. Wong
@ 2021-07-26 18:24     ` Christoph Hellwig
  0 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2021-07-26 18:24 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Andreas Gruenbacher, LKML, Matthew Wilcox, Joseph Qi,
	linux-fsdevel, linux-erofs, Christoph Hellwig

On Mon, Jul 26, 2021 at 11:19:25AM -0700, Darrick J. Wong wrote:
> On Mon, Jul 26, 2021 at 04:58:58PM +0200, Christoph Hellwig wrote:
> > Looks good to me:
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Uh, did you mean RVB here?

Yes:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v8] iomap: make inline data support more flexible
  2021-07-26 14:57 [PATCH v8] iomap: make inline data support more flexible Gao Xiang
  2021-07-26 14:58 ` Christoph Hellwig
  2021-07-26 15:06 ` Matthew Wilcox
@ 2021-07-26 22:10 ` Darrick J. Wong
  2021-07-27  2:19   ` Gao Xiang
  2 siblings, 1 reply; 8+ messages in thread
From: Darrick J. Wong @ 2021-07-26 22:10 UTC (permalink / raw)
  To: Gao Xiang
  Cc: Andreas Gruenbacher, LKML, Matthew Wilcox, Joseph Qi,
	linux-fsdevel, linux-erofs, Christoph Hellwig

On Mon, Jul 26, 2021 at 10:57:34PM +0800, Gao Xiang wrote:
> The existing inline data support only works for cases where the entire
> file is stored as inline data.  For larger files, EROFS stores the
> initial blocks separately and then can pack a small tail adjacent to the
> inode.  Generalise inline data to allow for tail packing.  Tails may not
> cross a page boundary in memory.
> 
> We currently have no filesystems that support tail packing and writing,
> so that case is currently disabled (see iomap_write_begin_inline).
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Darrick J. Wong <djwong@kernel.org>
> Cc: Matthew Wilcox <willy@infradead.org>
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
> v7: https://lore.kernel.org/r/20210723174131.180813-1-hsiangkao@linux.alibaba.com
> changes since v7:
>  - This version is based on Andreas's patch, the main difference
>    is to avoid using "iomap->length" in iomap_read_inline_data().
>    more details see:
>     https://lore.kernel.org/r/CAHpGcMJhuSApy4eg9jKe2pYq4d7bY-Lg-Bmo9tOANghQ2Hxo-A@mail.gmail.com
>    The rest are similar (some renaming and return type changes.)
> 
>  - with update according to Christoph's comments:
>    https://lore.kernel.org/r/20210726121702.GA528@lst.de/
>    except that "
>     I think we should fix that now that we have the srcmap concept.
>     That is or IOMAP_WRITE|IOMAP_ZERO return the inline map as the
>     soure map, and return the actual block map we plan to write into
>     as the main iomap. "
>    Hopefully it could be addressed with a new gfs2-related patch.
> 
>  - it passes gfs2 fstests and no strange on my side.
> 
> Hopefully I don't miss anything (already many inputs), and everyone
> is happy with this version.
> 
>  fs/iomap/buffered-io.c | 40 ++++++++++++++++++++++++++++------------
>  fs/iomap/direct-io.c   | 10 ++++++----
>  include/linux/iomap.h  | 18 ++++++++++++++++++
>  3 files changed, 52 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 87ccb3438bec..0d9f161ecb7e 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -205,25 +205,30 @@ struct iomap_readpage_ctx {
>  	struct readahead_control *rac;
>  };
>  
> -static void
> -iomap_read_inline_data(struct inode *inode, struct page *page,
> +static int iomap_read_inline_data(struct inode *inode, struct page *page,
>  		struct iomap *iomap)
>  {
> -	size_t size = i_size_read(inode);
> +	size_t size = i_size_read(inode) - iomap->offset;
>  	void *addr;
>  
>  	if (PageUptodate(page))
> -		return;
> +		return 0;
>  
> -	BUG_ON(page_has_private(page));
> -	BUG_ON(page->index);
> -	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	/* inline data must start page aligned in the file */
> +	if (WARN_ON_ONCE(offset_in_page(iomap->offset)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(size > PAGE_SIZE -
> +			 offset_in_page(iomap->inline_data)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(page_has_private(page)))
> +		return -EIO;
>  
>  	addr = kmap_atomic(page);
>  	memcpy(addr, iomap->inline_data, size);
>  	memset(addr + size, 0, PAGE_SIZE - size);
>  	kunmap_atomic(addr);
>  	SetPageUptodate(page);
> +	return 0;

As I muttered in the v7 thread, I don't really like how this function
gets away from using iomap->length for the copy length, unlike the other
iomap read paths.  I started sketching out how I'd really like the
function to read and ended up with:

static int iomap_read_inline_data(struct inode *inode, struct page *page,
			struct iomap *iomap)
{
	void *addr;
	loff_t isize = i_size_read(inode);
	loff_t ret;
	unsigned int plen = min(isize - iomap->offset, iomap->length);

	/* inline data must start page aligned in the file */
	if (WARN_ON_ONCE(offset_in_page(iomap->offset)))
		return -EIO;
	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
		return -EIO;
	if (WARN_ON_ONCE(page_has_private(page)))
		return -EIO;

	addr = kmap_atomic(page);
	memcpy(addr, iomap->inline_data, plen);
	if (iomap->offset + plen == isize) {
		/* If we reach EOF, we can zero the rest of the page */
		memset(addr + plen, 0, PAGE_SIZE - plen);
		plen = PAGE_SIZE;
	}

	if (offset_in_page(iomap->offset) == 0 && plen == PAGE_SIZE) {
		SetPageUptodate(page);
	} else {
		iomap_page_create(inode, page);
		iomap_set_range_uptodate(page,
				offset_in_page(iomap->offset), plen);
	}
	kunmap_atomic(addr);
	return plen;
}

But then my brain filled up with all the other potential case I'd have
to support in order to do this properly, and decided that this patch,
while retaining some grossness, isn't really any worse that what we have
now.

I think I /would/ like to request a V9 with one extra safety check,
however:

	if (WARN_ON_ONCE(size > iomap->length))
		return -EIO;

Add that one sanity check and I think I'm willing to throw this on the
pile for 5.15.

--D

>  }
>  
>  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> @@ -247,8 +252,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	sector_t sector;
>  
>  	if (iomap->type == IOMAP_INLINE) {
> -		WARN_ON_ONCE(pos);
> -		iomap_read_inline_data(inode, page, iomap);
> +		int ret = iomap_read_inline_data(inode, page, iomap);
> +
> +		if (ret)
> +			return ret;
>  		return PAGE_SIZE;
>  	}
>  
> @@ -589,6 +596,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  	return 0;
>  }
>  
> +static int iomap_write_begin_inline(struct inode *inode,
> +		struct page *page, struct iomap *srcmap)
> +{
> +	/* needs more work for the tailpacking case, disable for now */
> +	if (WARN_ON_ONCE(srcmap->offset != 0))
> +		return -EIO;
> +	return iomap_read_inline_data(inode, page, srcmap);
> +}
> +
>  static int
>  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> @@ -618,7 +634,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  	}
>  
>  	if (srcmap->type == IOMAP_INLINE)
> -		iomap_read_inline_data(inode, page, srcmap);
> +		status = iomap_write_begin_inline(inode, page, srcmap);
>  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
>  		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>  	else
> @@ -671,11 +687,11 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
>  	void *addr;
>  
>  	WARN_ON_ONCE(!PageUptodate(page));
> -	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	BUG_ON(!iomap_inline_data_valid(iomap));
>  
>  	flush_dcache_page(page);
>  	addr = kmap_atomic(page);
> -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> +	memcpy(iomap_inline_data(iomap, pos), addr + pos, copied);
>  	kunmap_atomic(addr);
>  
>  	mark_inode_dirty(inode);
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 9398b8c31323..41ccbfc9dc82 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  		struct iomap_dio *dio, struct iomap *iomap)
>  {
>  	struct iov_iter *iter = dio->submit.iter;
> +	void *inline_data = iomap_inline_data(iomap, pos);
>  	size_t copied;
>  
> -	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
> +		return -EIO;
>  
>  	if (dio->flags & IOMAP_DIO_WRITE) {
>  		loff_t size = inode->i_size;
>  
>  		if (pos > size)
> -			memset(iomap->inline_data + size, 0, pos - size);
> -		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> +			memset(iomap_inline_data(iomap, size), 0, pos - size);
> +		copied = copy_from_iter(inline_data, length, iter);
>  		if (copied) {
>  			if (pos + copied > size)
>  				i_size_write(inode, pos + copied);
>  			mark_inode_dirty(inode);
>  		}
>  	} else {
> -		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> +		copied = copy_to_iter(inline_data, length, iter);
>  	}
>  	dio->size += copied;
>  	return copied;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 479c1da3e221..b8ec145b2975 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -97,6 +97,24 @@ iomap_sector(struct iomap *iomap, loff_t pos)
>  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>  }
>  
> +/*
> + * Returns the inline data pointer for logical offset @pos.
> + */
> +static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos)
> +{
> +	return iomap->inline_data + pos - iomap->offset;
> +}
> +
> +/*
> + * Check if the mapping's length is within the valid range for inline data.
> + * This is used to guard against accessing data beyond the page inline_data
> + * points at.
> + */
> +static inline bool iomap_inline_data_valid(struct iomap *iomap)
> +{
> +	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
> +}
> +
>  /*
>   * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
>   * and page_done will be called for each page written to.  This only applies to
> -- 
> 2.24.4
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v8] iomap: make inline data support more flexible
  2021-07-26 22:10 ` Darrick J. Wong
@ 2021-07-27  2:19   ` Gao Xiang
  0 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang @ 2021-07-27  2:19 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Andreas Gruenbacher, LKML, Matthew Wilcox, Joseph Qi,
	linux-fsdevel, linux-erofs, Christoph Hellwig

Hi Darrick,

On Mon, Jul 26, 2021 at 03:10:54PM -0700, Darrick J. Wong wrote:
> On Mon, Jul 26, 2021 at 10:57:34PM +0800, Gao Xiang wrote:
> > The existing inline data support only works for cases where the entire
> > file is stored as inline data.  For larger files, EROFS stores the
> > initial blocks separately and then can pack a small tail adjacent to the
> > inode.  Generalise inline data to allow for tail packing.  Tails may not
> > cross a page boundary in memory.
> > 
> > We currently have no filesystems that support tail packing and writing,
> > so that case is currently disabled (see iomap_write_begin_inline).
> > 
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Darrick J. Wong <djwong@kernel.org>
> > Cc: Matthew Wilcox <willy@infradead.org>
> > Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > ---
> > v7: https://lore.kernel.org/r/20210723174131.180813-1-hsiangkao@linux.alibaba.com
> > changes since v7:
> >  - This version is based on Andreas's patch, the main difference
> >    is to avoid using "iomap->length" in iomap_read_inline_data().
> >    more details see:
> >     https://lore.kernel.org/r/CAHpGcMJhuSApy4eg9jKe2pYq4d7bY-Lg-Bmo9tOANghQ2Hxo-A@mail.gmail.com
> >    The rest are similar (some renaming and return type changes.)
> > 
> >  - with update according to Christoph's comments:
> >    https://lore.kernel.org/r/20210726121702.GA528@lst.de/
> >    except that "
> >     I think we should fix that now that we have the srcmap concept.
> >     That is or IOMAP_WRITE|IOMAP_ZERO return the inline map as the
> >     soure map, and return the actual block map we plan to write into
> >     as the main iomap. "
> >    Hopefully it could be addressed with a new gfs2-related patch.
> > 
> >  - it passes gfs2 fstests and no strange on my side.
> > 
> > Hopefully I don't miss anything (already many inputs), and everyone
> > is happy with this version.
> > 
> >  fs/iomap/buffered-io.c | 40 ++++++++++++++++++++++++++++------------
> >  fs/iomap/direct-io.c   | 10 ++++++----
> >  include/linux/iomap.h  | 18 ++++++++++++++++++
> >  3 files changed, 52 insertions(+), 16 deletions(-)
> > 
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index 87ccb3438bec..0d9f161ecb7e 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -205,25 +205,30 @@ struct iomap_readpage_ctx {
> >  	struct readahead_control *rac;
> >  };
> >  
> > -static void
> > -iomap_read_inline_data(struct inode *inode, struct page *page,
> > +static int iomap_read_inline_data(struct inode *inode, struct page *page,
> >  		struct iomap *iomap)
> >  {
> > -	size_t size = i_size_read(inode);
> > +	size_t size = i_size_read(inode) - iomap->offset;
> >  	void *addr;
> >  
> >  	if (PageUptodate(page))
> > -		return;
> > +		return 0;
> >  
> > -	BUG_ON(page_has_private(page));
> > -	BUG_ON(page->index);
> > -	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +	/* inline data must start page aligned in the file */
> > +	if (WARN_ON_ONCE(offset_in_page(iomap->offset)))
> > +		return -EIO;
> > +	if (WARN_ON_ONCE(size > PAGE_SIZE -
> > +			 offset_in_page(iomap->inline_data)))
> > +		return -EIO;
> > +	if (WARN_ON_ONCE(page_has_private(page)))
> > +		return -EIO;
> >  
> >  	addr = kmap_atomic(page);
> >  	memcpy(addr, iomap->inline_data, size);
> >  	memset(addr + size, 0, PAGE_SIZE - size);
> >  	kunmap_atomic(addr);
> >  	SetPageUptodate(page);
> > +	return 0;
> 
> As I muttered in the v7 thread, I don't really like how this function
> gets away from using iomap->length for the copy length, unlike the other
> iomap read paths.  I started sketching out how I'd really like the
> function to read and ended up with:
> 
> static int iomap_read_inline_data(struct inode *inode, struct page *page,
> 			struct iomap *iomap)
> {
> 	void *addr;
> 	loff_t isize = i_size_read(inode);
> 	loff_t ret;
> 	unsigned int plen = min(isize - iomap->offset, iomap->length);
> 
> 	/* inline data must start page aligned in the file */
> 	if (WARN_ON_ONCE(offset_in_page(iomap->offset)))
> 		return -EIO;
> 	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
> 		return -EIO;
> 	if (WARN_ON_ONCE(page_has_private(page)))
> 		return -EIO;
> 
> 	addr = kmap_atomic(page);
> 	memcpy(addr, iomap->inline_data, plen);
> 	if (iomap->offset + plen == isize) {
> 		/* If we reach EOF, we can zero the rest of the page */
> 		memset(addr + plen, 0, PAGE_SIZE - plen);
> 		plen = PAGE_SIZE;
> 	}
> 
> 	if (offset_in_page(iomap->offset) == 0 && plen == PAGE_SIZE) {
> 		SetPageUptodate(page);
> 	} else {
> 		iomap_page_create(inode, page);
> 		iomap_set_range_uptodate(page,
> 				offset_in_page(iomap->offset), plen);

If we finally do like this, I think `plen' here needs to be block-aligned
like my previous patches.

Another point is currently these code has no use cases, so let's do the
minimal thing first. (I could try this when working on subpage-sized
block support.)

> 	}
> 	kunmap_atomic(addr);
> 	return plen;
> }
> 
> But then my brain filled up with all the other potential case I'd have
> to support in order to do this properly, and decided that this patch,
> while retaining some grossness, isn't really any worse that what we have
> now.

Yeah.

> 
> I think I /would/ like to request a V9 with one extra safety check,
> however:
> 
> 	if (WARN_ON_ONCE(size > iomap->length))
> 		return -EIO;
> 
> Add that one sanity check and I think I'm willing to throw this on the
> pile for 5.15.

Okay, will update.

Thanks,
Gao Xiang

> 
> --D
> 
> >  }
> >  
> >  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> > @@ -247,8 +252,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >  	sector_t sector;
> >  
> >  	if (iomap->type == IOMAP_INLINE) {
> > -		WARN_ON_ONCE(pos);
> > -		iomap_read_inline_data(inode, page, iomap);
> > +		int ret = iomap_read_inline_data(inode, page, iomap);
> > +
> > +		if (ret)
> > +			return ret;
> >  		return PAGE_SIZE;
> >  	}
> >  
> > @@ -589,6 +596,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> >  	return 0;
> >  }
> >  
> > +static int iomap_write_begin_inline(struct inode *inode,
> > +		struct page *page, struct iomap *srcmap)
> > +{
> > +	/* needs more work for the tailpacking case, disable for now */
> > +	if (WARN_ON_ONCE(srcmap->offset != 0))
> > +		return -EIO;
> > +	return iomap_read_inline_data(inode, page, srcmap);
> > +}
> > +
> >  static int
> >  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> >  		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> > @@ -618,7 +634,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> >  	}
> >  
> >  	if (srcmap->type == IOMAP_INLINE)
> > -		iomap_read_inline_data(inode, page, srcmap);
> > +		status = iomap_write_begin_inline(inode, page, srcmap);
> >  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
> >  		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
> >  	else
> > @@ -671,11 +687,11 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
> >  	void *addr;
> >  
> >  	WARN_ON_ONCE(!PageUptodate(page));
> > -	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +	BUG_ON(!iomap_inline_data_valid(iomap));
> >  
> >  	flush_dcache_page(page);
> >  	addr = kmap_atomic(page);
> > -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> > +	memcpy(iomap_inline_data(iomap, pos), addr + pos, copied);
> >  	kunmap_atomic(addr);
> >  
> >  	mark_inode_dirty(inode);
> > diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> > index 9398b8c31323..41ccbfc9dc82 100644
> > --- a/fs/iomap/direct-io.c
> > +++ b/fs/iomap/direct-io.c
> > @@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
> >  		struct iomap_dio *dio, struct iomap *iomap)
> >  {
> >  	struct iov_iter *iter = dio->submit.iter;
> > +	void *inline_data = iomap_inline_data(iomap, pos);
> >  	size_t copied;
> >  
> > -	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +	if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
> > +		return -EIO;
> >  
> >  	if (dio->flags & IOMAP_DIO_WRITE) {
> >  		loff_t size = inode->i_size;
> >  
> >  		if (pos > size)
> > -			memset(iomap->inline_data + size, 0, pos - size);
> > -		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> > +			memset(iomap_inline_data(iomap, size), 0, pos - size);
> > +		copied = copy_from_iter(inline_data, length, iter);
> >  		if (copied) {
> >  			if (pos + copied > size)
> >  				i_size_write(inode, pos + copied);
> >  			mark_inode_dirty(inode);
> >  		}
> >  	} else {
> > -		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> > +		copied = copy_to_iter(inline_data, length, iter);
> >  	}
> >  	dio->size += copied;
> >  	return copied;
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index 479c1da3e221..b8ec145b2975 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -97,6 +97,24 @@ iomap_sector(struct iomap *iomap, loff_t pos)
> >  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
> >  }
> >  
> > +/*
> > + * Returns the inline data pointer for logical offset @pos.
> > + */
> > +static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos)
> > +{
> > +	return iomap->inline_data + pos - iomap->offset;
> > +}
> > +
> > +/*
> > + * Check if the mapping's length is within the valid range for inline data.
> > + * This is used to guard against accessing data beyond the page inline_data
> > + * points at.
> > + */
> > +static inline bool iomap_inline_data_valid(struct iomap *iomap)
> > +{
> > +	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
> > +}
> > +
> >  /*
> >   * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
> >   * and page_done will be called for each page written to.  This only applies to
> > -- 
> > 2.24.4
> > 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-07-27  2:19 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-26 14:57 [PATCH v8] iomap: make inline data support more flexible Gao Xiang
2021-07-26 14:58 ` Christoph Hellwig
2021-07-26 18:19   ` Darrick J. Wong
2021-07-26 18:24     ` Christoph Hellwig
2021-07-26 15:06 ` Matthew Wilcox
2021-07-26 15:15   ` Gao Xiang
2021-07-26 22:10 ` Darrick J. Wong
2021-07-27  2:19   ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).