All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fs: reduce pointers while using file_ra_state_init()
@ 2021-07-26 16:46 Goldwyn Rodrigues
  2021-07-26 17:19 ` Matthew Wilcox
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Goldwyn Rodrigues @ 2021-07-26 16:46 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-btrfs, linux-fscrypt, linux-nfs

Simplification.

file_ra_state_init() take struct address_space *, just to use inode
pointer by dereferencing from mapping->host.

The callers also derive mapping either by file->f_mapping, or
even file->f_mapping->host->i_mapping.

Change file_ra_state_init() to accept struct inode * to reduce pointer
dereferencing, both in the callee and the caller.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/btrfs/free-space-cache.c | 2 +-
 fs/btrfs/ioctl.c            | 2 +-
 fs/btrfs/relocation.c       | 2 +-
 fs/btrfs/send.c             | 2 +-
 fs/nfs/nfs4file.c           | 2 +-
 fs/open.c                   | 2 +-
 fs/verity/enable.c          | 2 +-
 include/linux/fs.h          | 2 +-
 mm/readahead.c              | 4 ++--
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4806295116d8..c43bf9915cda 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -351,7 +351,7 @@ static void readahead_cache(struct inode *inode)
 	if (!ra)
 		return;
 
-	file_ra_state_init(ra, inode->i_mapping);
+	file_ra_state_init(ra, inode);
 	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
 
 	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5dc2fd843ae3..b3508887d466 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1399,7 +1399,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 	if (!file) {
 		ra = kzalloc(sizeof(*ra), GFP_KERNEL);
 		if (ra)
-			file_ra_state_init(ra, inode->i_mapping);
+			file_ra_state_init(ra, inode);
 	} else {
 		ra = &file->f_ra;
 	}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b70be2ac2e9e..4f35672b93a5 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2911,7 +2911,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 	if (ret)
 		goto out;
 
-	file_ra_state_init(ra, inode->i_mapping);
+	file_ra_state_init(ra, inode);
 
 	ret = setup_extent_mapping(inode, cluster->start - offset,
 				   cluster->end - offset, cluster->start);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index bd69db72acc5..3eb8d2277a3d 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4949,7 +4949,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
 
 	/* initial readahead */
 	memset(&sctx->ra, 0, sizeof(struct file_ra_state));
-	file_ra_state_init(&sctx->ra, inode->i_mapping);
+	file_ra_state_init(&sctx->ra, inode);
 
 	while (index <= last_index) {
 		unsigned cur_len = min_t(unsigned, len,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index a1e5c6b85ded..c810a6151c93 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -385,7 +385,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
 	nfs_file_set_open_context(filep, ctx);
 	put_nfs_open_context(ctx);
 
-	file_ra_state_init(&filep->f_ra, filep->f_mapping->host->i_mapping);
+	file_ra_state_init(&filep->f_ra, file_inode(filep));
 	res = filep;
 out_free_name:
 	kfree(read_name);
diff --git a/fs/open.c b/fs/open.c
index e53af13b5835..9c6773a4fb30 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -840,7 +840,7 @@ static int do_dentry_open(struct file *f,
 	f->f_write_hint = WRITE_LIFE_NOT_SET;
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
-	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+	file_ra_state_init(&f->f_ra, inode);
 
 	/* NB: we're sure to have correct a_ops only after f_op->open */
 	if (f->f_flags & O_DIRECT) {
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index 77e159a0346b..460d881080ac 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -66,7 +66,7 @@ static int build_merkle_tree_level(struct file *filp, unsigned int level,
 		dst_block_num = 0; /* unused */
 	}
 
-	file_ra_state_init(&ra, filp->f_mapping);
+	file_ra_state_init(&ra, inode);
 
 	for (i = 0; i < num_blocks_to_hash; i++) {
 		struct page *src_page;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..3b8ce0221477 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3260,7 +3260,7 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 
 extern void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+file_ra_state_init(struct file_ra_state *ra, struct inode *inode);
 extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
diff --git a/mm/readahead.c b/mm/readahead.c
index d589f147f4c2..3541941df5e7 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -31,9 +31,9 @@
  * memset *ra to zero.
  */
 void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
+file_ra_state_init(struct file_ra_state *ra, struct inode *inode)
 {
-	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
+	ra->ra_pages = inode_to_bdi(inode)->ra_pages;
 	ra->prev_pos = -1;
 }
 EXPORT_SYMBOL_GPL(file_ra_state_init);
-- 
2.32.0


-- 
Goldwyn

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-26 16:46 [PATCH] fs: reduce pointers while using file_ra_state_init() Goldwyn Rodrigues
@ 2021-07-26 17:19 ` Matthew Wilcox
  2021-07-29 16:08   ` Goldwyn Rodrigues
  2021-07-26 22:06 ` NeilBrown
  2021-07-29  4:58 ` NeilBrown
  2 siblings, 1 reply; 10+ messages in thread
From: Matthew Wilcox @ 2021-07-26 17:19 UTC (permalink / raw)
  To: Goldwyn Rodrigues; +Cc: linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On Mon, Jul 26, 2021 at 11:46:47AM -0500, Goldwyn Rodrigues wrote:
> Simplification.
> 
> file_ra_state_init() take struct address_space *, just to use inode
> pointer by dereferencing from mapping->host.
> 
> The callers also derive mapping either by file->f_mapping, or
> even file->f_mapping->host->i_mapping.
> 
> Change file_ra_state_init() to accept struct inode * to reduce pointer
> dereferencing, both in the callee and the caller.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>

(some adjacent comments)

> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index 4806295116d8..c43bf9915cda 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -351,7 +351,7 @@ static void readahead_cache(struct inode *inode)
>  	if (!ra)
>  		return;
>  
> -	file_ra_state_init(ra, inode->i_mapping);
> +	file_ra_state_init(ra, inode);
>  	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
>  
>  	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);

Why does btrfs allocate a file_ra_state using kmalloc instead of
on the stack?

> +++ b/include/linux/fs.h
> @@ -3260,7 +3260,7 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
>  
>  
>  extern void
> -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
> +file_ra_state_init(struct file_ra_state *ra, struct inode *inode);

This should move to pagemap.h (and lose the extern).
I'd put it near the definition of VM_READAHEAD_PAGES.

> diff --git a/mm/readahead.c b/mm/readahead.c
> index d589f147f4c2..3541941df5e7 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -31,9 +31,9 @@
>   * memset *ra to zero.
>   */
>  void
> -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
> +file_ra_state_init(struct file_ra_state *ra, struct inode *inode)
>  {
> -	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
> +	ra->ra_pages = inode_to_bdi(inode)->ra_pages;
>  	ra->prev_pos = -1;
>  }
>  EXPORT_SYMBOL_GPL(file_ra_state_init);

I'm not entirely sure why this function is out-of-line, tbh.
Would it make more sense for it to be static inline in a header?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-26 16:46 [PATCH] fs: reduce pointers while using file_ra_state_init() Goldwyn Rodrigues
  2021-07-26 17:19 ` Matthew Wilcox
@ 2021-07-26 22:06 ` NeilBrown
  2021-07-27  2:05   ` Matthew Wilcox
  2021-07-29  4:58 ` NeilBrown
  2 siblings, 1 reply; 10+ messages in thread
From: NeilBrown @ 2021-07-26 22:06 UTC (permalink / raw)
  To: Goldwyn Rodrigues; +Cc: linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On Tue, 27 Jul 2021, Goldwyn Rodrigues wrote:
> Simplification.
> 
> file_ra_state_init() take struct address_space *, just to use inode
> pointer by dereferencing from mapping->host.
> 
> The callers also derive mapping either by file->f_mapping, or
> even file->f_mapping->host->i_mapping.
> 
> Change file_ra_state_init() to accept struct inode * to reduce pointer
> dereferencing, both in the callee and the caller.

You seem to be assuming that inode->i_mapping->host is always 'inode'.
That is not the case.

In particular, fs/coda/file.c contains

	if (coda_inode->i_mapping == &coda_inode->i_data)
		coda_inode->i_mapping = host_inode->i_mapping;

So a "coda_inode" shares the mapping with a "host_inode".

This is why an inode has both i_data and i_mapping.

So I'm not really sure this patch is safe.  It might break codafs.

But it is more likely that codafs isn't used, doesn't work, should be
removed, and i_data should be renamed to i_mapping.

NeilBrown


> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/btrfs/free-space-cache.c | 2 +-
>  fs/btrfs/ioctl.c            | 2 +-
>  fs/btrfs/relocation.c       | 2 +-
>  fs/btrfs/send.c             | 2 +-
>  fs/nfs/nfs4file.c           | 2 +-
>  fs/open.c                   | 2 +-
>  fs/verity/enable.c          | 2 +-
>  include/linux/fs.h          | 2 +-
>  mm/readahead.c              | 4 ++--
>  9 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index 4806295116d8..c43bf9915cda 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -351,7 +351,7 @@ static void readahead_cache(struct inode *inode)
>  	if (!ra)
>  		return;
>  
> -	file_ra_state_init(ra, inode->i_mapping);
> +	file_ra_state_init(ra, inode);
>  	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
>  
>  	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 5dc2fd843ae3..b3508887d466 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1399,7 +1399,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
>  	if (!file) {
>  		ra = kzalloc(sizeof(*ra), GFP_KERNEL);
>  		if (ra)
> -			file_ra_state_init(ra, inode->i_mapping);
> +			file_ra_state_init(ra, inode);
>  	} else {
>  		ra = &file->f_ra;
>  	}
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index b70be2ac2e9e..4f35672b93a5 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2911,7 +2911,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
>  	if (ret)
>  		goto out;
>  
> -	file_ra_state_init(ra, inode->i_mapping);
> +	file_ra_state_init(ra, inode);
>  
>  	ret = setup_extent_mapping(inode, cluster->start - offset,
>  				   cluster->end - offset, cluster->start);
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index bd69db72acc5..3eb8d2277a3d 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -4949,7 +4949,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
>  
>  	/* initial readahead */
>  	memset(&sctx->ra, 0, sizeof(struct file_ra_state));
> -	file_ra_state_init(&sctx->ra, inode->i_mapping);
> +	file_ra_state_init(&sctx->ra, inode);
>  
>  	while (index <= last_index) {
>  		unsigned cur_len = min_t(unsigned, len,
> diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
> index a1e5c6b85ded..c810a6151c93 100644
> --- a/fs/nfs/nfs4file.c
> +++ b/fs/nfs/nfs4file.c
> @@ -385,7 +385,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
>  	nfs_file_set_open_context(filep, ctx);
>  	put_nfs_open_context(ctx);
>  
> -	file_ra_state_init(&filep->f_ra, filep->f_mapping->host->i_mapping);
> +	file_ra_state_init(&filep->f_ra, file_inode(filep));
>  	res = filep;
>  out_free_name:
>  	kfree(read_name);
> diff --git a/fs/open.c b/fs/open.c
> index e53af13b5835..9c6773a4fb30 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -840,7 +840,7 @@ static int do_dentry_open(struct file *f,
>  	f->f_write_hint = WRITE_LIFE_NOT_SET;
>  	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
>  
> -	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
> +	file_ra_state_init(&f->f_ra, inode);
>  
>  	/* NB: we're sure to have correct a_ops only after f_op->open */
>  	if (f->f_flags & O_DIRECT) {
> diff --git a/fs/verity/enable.c b/fs/verity/enable.c
> index 77e159a0346b..460d881080ac 100644
> --- a/fs/verity/enable.c
> +++ b/fs/verity/enable.c
> @@ -66,7 +66,7 @@ static int build_merkle_tree_level(struct file *filp, unsigned int level,
>  		dst_block_num = 0; /* unused */
>  	}
>  
> -	file_ra_state_init(&ra, filp->f_mapping);
> +	file_ra_state_init(&ra, inode);
>  
>  	for (i = 0; i < num_blocks_to_hash; i++) {
>  		struct page *src_page;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index c3c88fdb9b2a..3b8ce0221477 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3260,7 +3260,7 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
>  
>  
>  extern void
> -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
> +file_ra_state_init(struct file_ra_state *ra, struct inode *inode);
>  extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
>  extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
>  extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
> diff --git a/mm/readahead.c b/mm/readahead.c
> index d589f147f4c2..3541941df5e7 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -31,9 +31,9 @@
>   * memset *ra to zero.
>   */
>  void
> -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
> +file_ra_state_init(struct file_ra_state *ra, struct inode *inode)
>  {
> -	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
> +	ra->ra_pages = inode_to_bdi(inode)->ra_pages;
>  	ra->prev_pos = -1;
>  }
>  EXPORT_SYMBOL_GPL(file_ra_state_init);
> -- 
> 2.32.0
> 
> 
> -- 
> Goldwyn
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-26 22:06 ` NeilBrown
@ 2021-07-27  2:05   ` Matthew Wilcox
  2021-07-27  2:25     ` NeilBrown
  0 siblings, 1 reply; 10+ messages in thread
From: Matthew Wilcox @ 2021-07-27  2:05 UTC (permalink / raw)
  To: NeilBrown
  Cc: Goldwyn Rodrigues, linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On Tue, Jul 27, 2021 at 08:06:21AM +1000, NeilBrown wrote:
> You seem to be assuming that inode->i_mapping->host is always 'inode'.
> That is not the case.

Weeeelllll ... technically, outside of the filesystems that are
changed here, the only assumption in common code that is made is that
inode_to_bdi(inode->i_mapping->host->i_mapping->host) ==
inode_to_bdi(inode)

Looking at inode_to_bdi, that just means that they have the same i_sb.
Which is ... not true for character raw devices?
        if (++raw_devices[minor].inuse == 1)
                file_inode(filp)->i_mapping =
                        bdev->bd_inode->i_mapping;
but then, who's using readahead on a character raw device?  They
force O_DIRECT.  But maybe this should pass inode->i_mapping->host
instead of inode.

> In particular, fs/coda/file.c contains
> 
> 	if (coda_inode->i_mapping == &coda_inode->i_data)
> 		coda_inode->i_mapping = host_inode->i_mapping;
> 
> So a "coda_inode" shares the mapping with a "host_inode".
> 
> This is why an inode has both i_data and i_mapping.
> 
> So I'm not really sure this patch is safe.  It might break codafs.
> 
> But it is more likely that codafs isn't used, doesn't work, should be
> removed, and i_data should be renamed to i_mapping.

I think there's also something unusual going on with either ocfs2
or gfs2.  But yes, I don't understand the rules for when I need to
go from inode->i_mapping->host.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-27  2:05   ` Matthew Wilcox
@ 2021-07-27  2:25     ` NeilBrown
  2021-07-27  2:46       ` Goldwyn Rodrigues
  0 siblings, 1 reply; 10+ messages in thread
From: NeilBrown @ 2021-07-27  2:25 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Goldwyn Rodrigues, linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On Tue, 27 Jul 2021, Matthew Wilcox wrote:
> On Tue, Jul 27, 2021 at 08:06:21AM +1000, NeilBrown wrote:
> > You seem to be assuming that inode->i_mapping->host is always 'inode'.
> > That is not the case.
> 
> Weeeelllll ... technically, outside of the filesystems that are
> changed here, the only assumption in common code that is made is that
> inode_to_bdi(inode->i_mapping->host->i_mapping->host) ==
> inode_to_bdi(inode)

Individual filesystems doing their own thing is fine.  Passing just an
inode to inode_to_bdi is fine.

But the patch changes do_dentry_open()

> 
> Looking at inode_to_bdi, that just means that they have the same i_sb.
> Which is ... not true for character raw devices?
>         if (++raw_devices[minor].inuse == 1)
>                 file_inode(filp)->i_mapping =
>                         bdev->bd_inode->i_mapping;
> but then, who's using readahead on a character raw device?  They
> force O_DIRECT.  But maybe this should pass inode->i_mapping->host
> instead of inode.

Also not true in coda.

coda (for those who don't know) is a network filesystem which fetches
whole files (and often multiple files) at a time (like the Andrew
filesystem).  The files are stored in a local filesystem which acts as a
cache.

So an inode in a 'coda' filesystem access page-cache pages from a file
in e.g. an 'ext4' filesystem.  This is done via the ->i_mapping link.
For (nearly?) all other filesystems, ->i_mapping is a link to ->i_data
in the same inode.

> 
> > In particular, fs/coda/file.c contains
> > 
> > 	if (coda_inode->i_mapping == &coda_inode->i_data)
> > 		coda_inode->i_mapping = host_inode->i_mapping;
> > 
> > So a "coda_inode" shares the mapping with a "host_inode".
> > 
> > This is why an inode has both i_data and i_mapping.
> > 
> > So I'm not really sure this patch is safe.  It might break codafs.
> > 
> > But it is more likely that codafs isn't used, doesn't work, should be
> > removed, and i_data should be renamed to i_mapping.
> 
> I think there's also something unusual going on with either ocfs2
> or gfs2.  But yes, I don't understand the rules for when I need to
> go from inode->i_mapping->host.
> 

Simple.  Whenever you want to work with the page-cache pages, you cannot
assume anything in the original inode is relevant except i_mapping (and
maybe i_size I guess).

NeilBrown

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-27  2:25     ` NeilBrown
@ 2021-07-27  2:46       ` Goldwyn Rodrigues
  2021-07-27  3:49         ` NeilBrown
  0 siblings, 1 reply; 10+ messages in thread
From: Goldwyn Rodrigues @ 2021-07-27  2:46 UTC (permalink / raw)
  To: NeilBrown
  Cc: Matthew Wilcox, linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On 12:25 27/07, NeilBrown wrote:
> On Tue, 27 Jul 2021, Matthew Wilcox wrote:
> > On Tue, Jul 27, 2021 at 08:06:21AM +1000, NeilBrown wrote:
> > > You seem to be assuming that inode->i_mapping->host is always 'inode'.
> > > That is not the case.
> > 
> > Weeeelllll ... technically, outside of the filesystems that are
> > changed here, the only assumption in common code that is made is that
> > inode_to_bdi(inode->i_mapping->host->i_mapping->host) ==
> > inode_to_bdi(inode)
> 
> Individual filesystems doing their own thing is fine.  Passing just an
> inode to inode_to_bdi is fine.
> 
> But the patch changes do_dentry_open()

But do_dentry_open() is setting up the file pointer (f) based on
inode (and it's i_mapping). Can f->f_mapping change within
do_dentry_open()?

> 
> > 
> > Looking at inode_to_bdi, that just means that they have the same i_sb.
> > Which is ... not true for character raw devices?
> >         if (++raw_devices[minor].inuse == 1)
> >                 file_inode(filp)->i_mapping =
> >                         bdev->bd_inode->i_mapping;
> > but then, who's using readahead on a character raw device?  They
> > force O_DIRECT.  But maybe this should pass inode->i_mapping->host
> > instead of inode.
> 
> Also not true in coda.
> 
> coda (for those who don't know) is a network filesystem which fetches
> whole files (and often multiple files) at a time (like the Andrew
> filesystem).  The files are stored in a local filesystem which acts as a
> cache.
> 
> So an inode in a 'coda' filesystem access page-cache pages from a file
> in e.g. an 'ext4' filesystem.  This is done via the ->i_mapping link.
> For (nearly?) all other filesystems, ->i_mapping is a link to ->i_data
> in the same inode.
> 
> > 
> > > In particular, fs/coda/file.c contains
> > > 
> > > 	if (coda_inode->i_mapping == &coda_inode->i_data)
> > > 		coda_inode->i_mapping = host_inode->i_mapping;
> > > 
> > > So a "coda_inode" shares the mapping with a "host_inode".
> > > 
> > > This is why an inode has both i_data and i_mapping.
> > > 
> > > So I'm not really sure this patch is safe.  It might break codafs.
> > > 
> > > But it is more likely that codafs isn't used, doesn't work, should be
> > > removed, and i_data should be renamed to i_mapping.
> > 
> > I think there's also something unusual going on with either ocfs2
> > or gfs2.  But yes, I don't understand the rules for when I need to
> > go from inode->i_mapping->host.
> > 
> 
> Simple.  Whenever you want to work with the page-cache pages, you cannot
> assume anything in the original inode is relevant except i_mapping (and
> maybe i_size I guess).
> 
> NeilBrown

-- 
Goldwyn

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-27  2:46       ` Goldwyn Rodrigues
@ 2021-07-27  3:49         ` NeilBrown
  0 siblings, 0 replies; 10+ messages in thread
From: NeilBrown @ 2021-07-27  3:49 UTC (permalink / raw)
  To: Goldwyn Rodrigues
  Cc: Matthew Wilcox, linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On Tue, 27 Jul 2021, Goldwyn Rodrigues wrote:
> On 12:25 27/07, NeilBrown wrote:
> > On Tue, 27 Jul 2021, Matthew Wilcox wrote:
> > > On Tue, Jul 27, 2021 at 08:06:21AM +1000, NeilBrown wrote:
> > > > You seem to be assuming that inode->i_mapping->host is always 'inode'.
> > > > That is not the case.
> > > 
> > > Weeeelllll ... technically, outside of the filesystems that are
> > > changed here, the only assumption in common code that is made is that
> > > inode_to_bdi(inode->i_mapping->host->i_mapping->host) ==
> > > inode_to_bdi(inode)
> > 
> > Individual filesystems doing their own thing is fine.  Passing just an
> > inode to inode_to_bdi is fine.
> > 
> > But the patch changes do_dentry_open()
> 
> But do_dentry_open() is setting up the file pointer (f) based on
> inode (and it's i_mapping). Can f->f_mapping change within
> do_dentry_open()?

do_dentry_open calls file_ra_state_init() to copy ra_pages from the bdi
for inode->i_mapping->host->i_mapping.
I do think there is some pointless indirection here, and it should be
sufficient to pass inode->i_mapping (aka f->f_mapping) to
file_ra_state_init(). (though in 2004, Andrew Morton thought otherwise)
But you have changed do_dentry_open() to not follow the ->i_mapping link
at all.
So in the coda case f->f_ra will be inititalied from the bdi for coda
instead of the bdi for the filesystem coda uses for local storage.

So this is a change in behaviour.  Maybe not a serious one, but one that
needs to be understood.

https://git.kernel.org/pub/scm/linux/kernel/git/history/history.git/commit/?id=1c211088833a27daa4512348bcae9890e8cf92d4

Hmm.  drivers/dax/device.c does some funky things with ->i_mapping too.
I wonder if that would be affected by this change....  probably not, it
looks like it is the same super_block and so the same ra info for both
mappings.

NeilBrown

> 
> > 
> > > 
> > > Looking at inode_to_bdi, that just means that they have the same i_sb.
> > > Which is ... not true for character raw devices?
> > >         if (++raw_devices[minor].inuse == 1)
> > >                 file_inode(filp)->i_mapping =
> > >                         bdev->bd_inode->i_mapping;
> > > but then, who's using readahead on a character raw device?  They
> > > force O_DIRECT.  But maybe this should pass inode->i_mapping->host
> > > instead of inode.
> > 
> > Also not true in coda.
> > 
> > coda (for those who don't know) is a network filesystem which fetches
> > whole files (and often multiple files) at a time (like the Andrew
> > filesystem).  The files are stored in a local filesystem which acts as a
> > cache.
> > 
> > So an inode in a 'coda' filesystem access page-cache pages from a file
> > in e.g. an 'ext4' filesystem.  This is done via the ->i_mapping link.
> > For (nearly?) all other filesystems, ->i_mapping is a link to ->i_data
> > in the same inode.
> > 
> > > 
> > > > In particular, fs/coda/file.c contains
> > > > 
> > > > 	if (coda_inode->i_mapping == &coda_inode->i_data)
> > > > 		coda_inode->i_mapping = host_inode->i_mapping;
> > > > 
> > > > So a "coda_inode" shares the mapping with a "host_inode".
> > > > 
> > > > This is why an inode has both i_data and i_mapping.
> > > > 
> > > > So I'm not really sure this patch is safe.  It might break codafs.
> > > > 
> > > > But it is more likely that codafs isn't used, doesn't work, should be
> > > > removed, and i_data should be renamed to i_mapping.
> > > 
> > > I think there's also something unusual going on with either ocfs2
> > > or gfs2.  But yes, I don't understand the rules for when I need to
> > > go from inode->i_mapping->host.
> > > 
> > 
> > Simple.  Whenever you want to work with the page-cache pages, you cannot
> > assume anything in the original inode is relevant except i_mapping (and
> > maybe i_size I guess).
> > 
> > NeilBrown
> 
> -- 
> Goldwyn
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-26 16:46 [PATCH] fs: reduce pointers while using file_ra_state_init() Goldwyn Rodrigues
  2021-07-26 17:19 ` Matthew Wilcox
  2021-07-26 22:06 ` NeilBrown
@ 2021-07-29  4:58 ` NeilBrown
  2021-07-29 15:19   ` Goldwyn Rodrigues
  2 siblings, 1 reply; 10+ messages in thread
From: NeilBrown @ 2021-07-29  4:58 UTC (permalink / raw)
  To: Goldwyn Rodrigues, Matthew Wilcox
  Cc: linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On Tue, 27 Jul 2021, Goldwyn Rodrigues wrote:
> Simplification.
> 
> file_ra_state_init() take struct address_space *, just to use inode
> pointer by dereferencing from mapping->host.
> 
> The callers also derive mapping either by file->f_mapping, or
> even file->f_mapping->host->i_mapping.
> 
> Change file_ra_state_init() to accept struct inode * to reduce pointer
> dereferencing, both in the callee and the caller.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
....

> diff --git a/mm/readahead.c b/mm/readahead.c
> index d589f147f4c2..3541941df5e7 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -31,9 +31,9 @@
>   * memset *ra to zero.
>   */
>  void
> -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
> +file_ra_state_init(struct file_ra_state *ra, struct inode *inode)
>  {
> -	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
> +	ra->ra_pages = inode_to_bdi(inode)->ra_pages;
>  	ra->prev_pos = -1;

I think this patch can be made OK by adding:

  if (unlikely(inode->i_mapping != &inode->i_data))
	inode = inode->i_mapping->host;

The "unlikely" is mostly for documentation.
Loading "inode->i_mapping" is nearly free as that cache line needs to be
loaded to get i_sb, which inode_to_bdi() needs.  Calculating &->i_data
is trivial.  So this adds minimal cost, and preserves correctness.

NeilBrown


>  }
>  EXPORT_SYMBOL_GPL(file_ra_state_init);
> -- 
> 2.32.0
> 
> 
> -- 
> Goldwyn
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-29  4:58 ` NeilBrown
@ 2021-07-29 15:19   ` Goldwyn Rodrigues
  0 siblings, 0 replies; 10+ messages in thread
From: Goldwyn Rodrigues @ 2021-07-29 15:19 UTC (permalink / raw)
  To: NeilBrown
  Cc: Matthew Wilcox, linux-fsdevel, linux-btrfs, linux-fscrypt,
	linux-nfs, codalist

On 14:58 29/07, NeilBrown wrote:
> On Tue, 27 Jul 2021, Goldwyn Rodrigues wrote:
> > Simplification.
> > 
> > file_ra_state_init() take struct address_space *, just to use inode
> > pointer by dereferencing from mapping->host.
> > 
> > The callers also derive mapping either by file->f_mapping, or
> > even file->f_mapping->host->i_mapping.
> > 
> > Change file_ra_state_init() to accept struct inode * to reduce pointer
> > dereferencing, both in the callee and the caller.
> > 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > ---
> ....
> 
> > diff --git a/mm/readahead.c b/mm/readahead.c
> > index d589f147f4c2..3541941df5e7 100644
> > --- a/mm/readahead.c
> > +++ b/mm/readahead.c
> > @@ -31,9 +31,9 @@
> >   * memset *ra to zero.
> >   */
> >  void
> > -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
> > +file_ra_state_init(struct file_ra_state *ra, struct inode *inode)
> >  {
> > -	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
> > +	ra->ra_pages = inode_to_bdi(inode)->ra_pages;
> >  	ra->prev_pos = -1;
> 
> I think this patch can be made OK by adding:
> 
>   if (unlikely(inode->i_mapping != &inode->i_data))
> 	inode = inode->i_mapping->host;
> 
> The "unlikely" is mostly for documentation.
> Loading "inode->i_mapping" is nearly free as that cache line needs to be
> loaded to get i_sb, which inode_to_bdi() needs.  Calculating &->i_data
> is trivial.  So this adds minimal cost, and preserves correctness.
> 

Thanks Neil. Coda seems to be the only filesystem to manipulate
inode->i_mapping to support the mmap() operation and eventually resets
it back on release().

Not sure if this hack should be put in just for coda, or just leave the
function prototype as it is to accept address_space *. I will send out
another patch to see what others feel about it.


-- 
Goldwyn

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] fs: reduce pointers while using file_ra_state_init()
  2021-07-26 17:19 ` Matthew Wilcox
@ 2021-07-29 16:08   ` Goldwyn Rodrigues
  0 siblings, 0 replies; 10+ messages in thread
From: Goldwyn Rodrigues @ 2021-07-29 16:08 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-fsdevel, linux-btrfs, linux-fscrypt, linux-nfs

On 18:19 26/07, Matthew Wilcox wrote:
> On Mon, Jul 26, 2021 at 11:46:47AM -0500, Goldwyn Rodrigues wrote:
> > Simplification.
> > 
> > file_ra_state_init() take struct address_space *, just to use inode
> > pointer by dereferencing from mapping->host.
> > 
> > The callers also derive mapping either by file->f_mapping, or
> > even file->f_mapping->host->i_mapping.
> > 
> > Change file_ra_state_init() to accept struct inode * to reduce pointer
> > dereferencing, both in the callee and the caller.
> > 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> 
> (some adjacent comments)
> 
> > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > index 4806295116d8..c43bf9915cda 100644
> > --- a/fs/btrfs/free-space-cache.c
> > +++ b/fs/btrfs/free-space-cache.c
> > @@ -351,7 +351,7 @@ static void readahead_cache(struct inode *inode)
> >  	if (!ra)
> >  		return;
> >  
> > -	file_ra_state_init(ra, inode->i_mapping);
> > +	file_ra_state_init(ra, inode);
> >  	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
> >  
> >  	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
> 
> Why does btrfs allocate a file_ra_state using kmalloc instead of
> on the stack?
> 
> > +++ b/include/linux/fs.h
> > @@ -3260,7 +3260,7 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
> >  
> >  
> >  extern void
> > -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
> > +file_ra_state_init(struct file_ra_state *ra, struct inode *inode);
> 
> This should move to pagemap.h (and lose the extern).
> I'd put it near the definition of VM_READAHEAD_PAGES.
> 
> > diff --git a/mm/readahead.c b/mm/readahead.c
> > index d589f147f4c2..3541941df5e7 100644
> > --- a/mm/readahead.c
> > +++ b/mm/readahead.c
> > @@ -31,9 +31,9 @@
> >   * memset *ra to zero.
> >   */
> >  void
> > -file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
> > +file_ra_state_init(struct file_ra_state *ra, struct inode *inode)
> >  {
> > -	ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
> > +	ra->ra_pages = inode_to_bdi(inode)->ra_pages;
> >  	ra->prev_pos = -1;
> >  }
> >  EXPORT_SYMBOL_GPL(file_ra_state_init);
> 
> I'm not entirely sure why this function is out-of-line, tbh.
> Would it make more sense for it to be static inline in a header?

Which one? pagemap.h or fs.h does not know of inode_to_bdi(), should
linux/backing-dev.h be included?

-- 
Goldwyn

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-07-29 16:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-26 16:46 [PATCH] fs: reduce pointers while using file_ra_state_init() Goldwyn Rodrigues
2021-07-26 17:19 ` Matthew Wilcox
2021-07-29 16:08   ` Goldwyn Rodrigues
2021-07-26 22:06 ` NeilBrown
2021-07-27  2:05   ` Matthew Wilcox
2021-07-27  2:25     ` NeilBrown
2021-07-27  2:46       ` Goldwyn Rodrigues
2021-07-27  3:49         ` NeilBrown
2021-07-29  4:58 ` NeilBrown
2021-07-29 15:19   ` Goldwyn Rodrigues

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.