All of lore.kernel.org
 help / color / mirror / Atom feed
From: "NeilBrown" <neilb@suse.de>
To: "Jeff Layton" <jlayton@kernel.org>
Cc: tytso@mit.edu, adilger.kernel@dilger.ca, djwong@kernel.org,
	david@fromorbit.com, trondmy@hammerspace.com,
	viro@zeniv.linux.org.uk, zohar@linux.ibm.com, xiubli@redhat.com,
	chuck.lever@oracle.com, lczerner@redhat.com, jack@suse.cz,
	bfields@fieldses.org, brauner@kernel.org, fweimer@redhat.com,
	linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, ceph-devel@vger.kernel.org,
	linux-ext4@vger.kernel.org, linux-nfs@vger.kernel.org,
	linux-xfs@vger.kernel.org
Subject: Re: [PATCH v6 6/9] nfsd: use the getattr operation to fetch i_version
Date: Tue, 04 Oct 2022 10:39:09 +1100	[thread overview]
Message-ID: <166484034920.14457.15225090674729127890@noble.neil.brown.name> (raw)
In-Reply-To: <20220930111840.10695-7-jlayton@kernel.org>

On Fri, 30 Sep 2022, Jeff Layton wrote:
> Now that we can call into vfs_getattr to get the i_version field, use
> that facility to fetch it instead of doing it in nfsd4_change_attribute.
> 
> Neil also pointed out recently that IS_I_VERSION directory operations
> are always logged, and so we only need to mitigate the rollback problem
> on regular files. Also, we don't need to factor in the ctime when
> reexporting NFS or Ceph.
> 
> Set the STATX_VERSION (and BTIME) bits in the request when we're dealing
> with a v4 request. Then, instead of looking at IS_I_VERSION when
> generating the change attr, look at the result mask and only use it if
> STATX_VERSION is set. With this change, we can drop the fetch_iversion
> export operation as well.
> 
> Move nfsd4_change_attribute into nfsfh.c, and change it to only factor
> in the ctime if it's a regular file and the fs doesn't advertise
> STATX_ATTR_VERSION_MONOTONIC.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/nfs/export.c          |  7 -------
>  fs/nfsd/nfs4xdr.c        |  4 +++-
>  fs/nfsd/nfsfh.c          | 40 ++++++++++++++++++++++++++++++++++++++++
>  fs/nfsd/nfsfh.h          | 29 +----------------------------
>  fs/nfsd/vfs.h            |  7 ++++++-
>  include/linux/exportfs.h |  1 -
>  6 files changed, 50 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/nfs/export.c b/fs/nfs/export.c
> index 01596f2d0a1e..1a9d5aa51dfb 100644
> --- a/fs/nfs/export.c
> +++ b/fs/nfs/export.c
> @@ -145,17 +145,10 @@ nfs_get_parent(struct dentry *dentry)
>  	return parent;
>  }
>  
> -static u64 nfs_fetch_iversion(struct inode *inode)
> -{
> -	nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
> -	return inode_peek_iversion_raw(inode);
> -}
> -
>  const struct export_operations nfs_export_ops = {
>  	.encode_fh = nfs_encode_fh,
>  	.fh_to_dentry = nfs_fh_to_dentry,
>  	.get_parent = nfs_get_parent,
> -	.fetch_iversion = nfs_fetch_iversion,
>  	.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
>  		EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
>  		EXPORT_OP_NOATOMIC_ATTR,
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 1e9690a061ec..779c009314c6 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2869,7 +2869,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>  			goto out;
>  	}
>  
> -	err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
> +	err = vfs_getattr(&path, &stat,
> +			  STATX_BASIC_STATS | STATX_BTIME | STATX_VERSION,
> +			  AT_STATX_SYNC_AS_STAT);
>  	if (err)
>  		goto out_nfserr;
>  	if (!(stat.result_mask & STATX_BTIME))
> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
> index a5b71526cee0..9168bc657378 100644
> --- a/fs/nfsd/nfsfh.c
> +++ b/fs/nfsd/nfsfh.c
> @@ -634,6 +634,10 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
>  		stat.mtime = inode->i_mtime;
>  		stat.ctime = inode->i_ctime;
>  		stat.size  = inode->i_size;
> +		if (v4 && IS_I_VERSION(inode)) {
> +			stat.version = inode_query_iversion(inode);
> +			stat.result_mask |= STATX_VERSION;
> +		}

This is increasingly ugly.  I wonder if it is justified at all...

>  	}
>  	if (v4)
>  		fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
> @@ -665,6 +669,8 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
>  	if (err) {
>  		fhp->fh_post_saved = false;
>  		fhp->fh_post_attr.ctime = inode->i_ctime;
> +		if (v4 && IS_I_VERSION(inode))
> +			fhp->fh_post_attr.version = inode_query_iversion(inode);

... ditto ...

>  	} else
>  		fhp->fh_post_saved = true;
>  	if (v4)
> @@ -754,3 +760,37 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
>  		return FSIDSOURCE_UUID;
>  	return FSIDSOURCE_DEV;
>  }
> +
> +/*
> + * We could use i_version alone as the change attribute.  However, i_version
> + * can go backwards on a regular file after an unclean shutdown.  On its own
> + * that doesn't necessarily cause a problem, but if i_version goes backwards
> + * and then is incremented again it could reuse a value that was previously
> + * used before boot, and a client who queried the two values might incorrectly
> + * assume nothing changed.
> + *
> + * By using both ctime and the i_version counter we guarantee that as long as
> + * time doesn't go backwards we never reuse an old value. If the filesystem
> + * advertises STATX_ATTR_VERSION_MONOTONIC, then this mitigation is not needed.
> + *
> + * We only need to do this for regular files as well. For directories, we
> + * assume that the new change attr is always logged to stable storage in some
> + * fashion before the results can be seen.
> + */
> +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode)
> +{
> +	u64 chattr;
> +
> +	if (stat->result_mask & STATX_VERSION) {
> +		chattr = stat->version;
> +
> +		if (S_ISREG(inode->i_mode) &&
> +		    !(stat->attributes & STATX_ATTR_VERSION_MONOTONIC)) {

I would really rather that the fs got to make this decision.
If it can guarantee that the i_version is monotonic even over a crash
(which is probably can for directory, and might need changes to do for
files) then it sets STATX_ATTR_VERSION_MONOTONIC and nfsd trusts it
completely.
If it cannot, then it doesn't set the flag.
i.e. the S_ISREG() test should be in the filesystem, not in nfsd.


> +			chattr += (u64)stat->ctime.tv_sec << 30;
> +			chattr += stat->ctime.tv_nsec;
> +		}
> +	} else {
> +		chattr = time_to_chattr(&stat->ctime);
> +	}
> +	return chattr;
> +}
> diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
> index c3ae6414fc5c..4c223a7a91d4 100644
> --- a/fs/nfsd/nfsfh.h
> +++ b/fs/nfsd/nfsfh.h
> @@ -291,34 +291,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
>  	fhp->fh_pre_saved = false;
>  }
>  
> -/*
> - * We could use i_version alone as the change attribute.  However,
> - * i_version can go backwards after a reboot.  On its own that doesn't
> - * necessarily cause a problem, but if i_version goes backwards and then
> - * is incremented again it could reuse a value that was previously used
> - * before boot, and a client who queried the two values might
> - * incorrectly assume nothing changed.
> - *
> - * By using both ctime and the i_version counter we guarantee that as
> - * long as time doesn't go backwards we never reuse an old value.
> - */
> -static inline u64 nfsd4_change_attribute(struct kstat *stat,
> -					 struct inode *inode)
> -{
> -	if (inode->i_sb->s_export_op->fetch_iversion)
> -		return inode->i_sb->s_export_op->fetch_iversion(inode);
> -	else if (IS_I_VERSION(inode)) {
> -		u64 chattr;
> -
> -		chattr =  stat->ctime.tv_sec;
> -		chattr <<= 30;
> -		chattr += stat->ctime.tv_nsec;
> -		chattr += inode_query_iversion(inode);
> -		return chattr;
> -	} else
> -		return time_to_chattr(&stat->ctime);
> -}
> -
> +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode);
>  extern void fh_fill_pre_attrs(struct svc_fh *fhp);
>  extern void fh_fill_post_attrs(struct svc_fh *fhp);
>  extern void fh_fill_both_attrs(struct svc_fh *fhp);
> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index c95cd414b4bb..a905f59481ee 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -168,9 +168,14 @@ static inline void fh_drop_write(struct svc_fh *fh)
>  
>  static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
>  {
> +	u32 request_mask = STATX_BASIC_STATS;
>  	struct path p = {.mnt = fh->fh_export->ex_path.mnt,
>  			 .dentry = fh->fh_dentry};
> -	return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
> +
> +	if (fh->fh_maxsize == NFS4_FHSIZE)
> +		request_mask |= (STATX_BTIME | STATX_VERSION);
> +
> +	return nfserrno(vfs_getattr(&p, stat, request_mask,
>  				    AT_STATX_SYNC_AS_STAT));
>  }
>  
> diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
> index fe848901fcc3..9f4d4bcbf251 100644
> --- a/include/linux/exportfs.h
> +++ b/include/linux/exportfs.h
> @@ -213,7 +213,6 @@ struct export_operations {
>  			  bool write, u32 *device_generation);
>  	int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
>  			     int nr_iomaps, struct iattr *iattr);
> -	u64 (*fetch_iversion)(struct inode *);
>  #define	EXPORT_OP_NOWCC			(0x1) /* don't collect v3 wcc data */
>  #define	EXPORT_OP_NOSUBTREECHK		(0x2) /* no subtree checking */
>  #define	EXPORT_OP_CLOSE_BEFORE_UNLINK	(0x4) /* close files before unlink */
> -- 
> 2.37.3
> 
> 

Definitely more to like than to dislike here, so

Reviewed-by: NeilBrown <neilb@suse.de>

Thanks,
NeilBrown

  parent reply	other threads:[~2022-10-03 23:39 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-30 11:18 [PATCH v6 0/9] vfs/nfsd: clean up handling of i_version counter Jeff Layton
2022-09-30 11:18 ` [PATCH v6 1/9] iversion: move inode_query_iversion to libfs.c Jeff Layton
2022-10-03 23:05   ` NeilBrown
2022-09-30 11:18 ` [PATCH v6 2/9] iversion: clarify when the i_version counter must be updated Jeff Layton
2022-10-03 23:10   ` NeilBrown
2022-10-04  9:53     ` Jeff Layton
2022-09-30 11:18 ` [PATCH v6 3/9] vfs: plumb i_version handling into struct kstat Jeff Layton
2022-10-03 23:14   ` NeilBrown
2022-09-30 11:18 ` [PATCH v6 4/9] nfs: report the inode version in getattr if requested Jeff Layton
2022-10-03 23:29   ` NeilBrown
2022-10-04  9:43     ` Jeff Layton
2022-10-04 22:27       ` NeilBrown
2022-09-30 11:18 ` [PATCH v6 5/9] ceph: " Jeff Layton
2022-09-30 11:18 ` [PATCH v6 6/9] nfsd: use the getattr operation to fetch i_version Jeff Layton
2022-09-30 14:34   ` Chuck Lever III
2022-09-30 22:32     ` Dave Chinner
2022-10-03 23:39   ` NeilBrown [this message]
2022-10-05 10:06     ` Jeff Layton
2022-10-05 13:33       ` Chuck Lever III
2022-10-05 13:34       ` Trond Myklebust
2022-10-05 13:57         ` Jeff Layton
2022-10-05 21:14       ` NeilBrown
2022-10-06 11:15         ` Jeff Layton
2022-10-06 21:17     ` Dave Chinner
2022-09-30 11:18 ` [PATCH v6 7/9] vfs: expose STATX_VERSION to userland Jeff Layton
2022-10-03 23:42   ` NeilBrown
2022-10-05 10:08     ` Jeff Layton
2022-09-30 11:18 ` [PATCH v6 8/9] vfs: update times after copying data in __generic_file_write_iter Jeff Layton
2022-10-01 20:39   ` kernel test robot
2022-10-01 21:00   ` kernel test robot
2022-10-02  7:08   ` Amir Goldstein
2022-10-03 13:01     ` Jeff Layton
2022-10-03 13:52       ` Amir Goldstein
2022-10-03 22:56         ` NeilBrown
2022-10-05 16:40           ` Jeff Layton
2022-10-05 21:40             ` NeilBrown
2022-09-30 11:18 ` [PATCH v6 9/9] ext4: update times after I/O in write codepaths Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166484034920.14457.15225090674729127890@noble.neil.brown.name \
    --to=neilb@suse.de \
    --cc=adilger.kernel@dilger.ca \
    --cc=bfields@fieldses.org \
    --cc=brauner@kernel.org \
    --cc=ceph-devel@vger.kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=fweimer@redhat.com \
    --cc=jack@suse.cz \
    --cc=jlayton@kernel.org \
    --cc=lczerner@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=trondmy@hammerspace.com \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xiubli@redhat.com \
    --cc=zohar@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.