All of lore.kernel.org
 help / color / mirror / Atom feed
* nfsd: add support for the chage_attr_type attribute
@ 2014-11-08 12:11 Christoph Hellwig
  2014-11-08 12:11 ` [PATCH 1/2] nfsd: correctly define v4.2 support attributes Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-11-08 12:11 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, linux-nfs

The first one is a bug fix that probably should go into 3.18-rc and -stable.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] nfsd: correctly define v4.2 support attributes
  2014-11-08 12:11 nfsd: add support for the chage_attr_type attribute Christoph Hellwig
@ 2014-11-08 12:11 ` Christoph Hellwig
  2014-11-10 22:21   ` J. Bruce Fields
  2014-11-08 12:11 ` [PATCH 2/2] nfsd: implement chage_attr_type attribute Christoph Hellwig
  2014-11-10 21:42 ` nfsd: add support for the " J. Bruce Fields
  2 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2014-11-08 12:11 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, linux-nfs

Even when security labels are disabled we support at least the same
attributes as v4.1.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfsd.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 747f3b95..43b6a36 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -335,12 +335,15 @@ void		nfsd_lockd_shutdown(void);
 	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
-#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
-	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL)
+#define NFSD4_2_SECURITY_ATTRS		FATTR4_WORD2_SECURITY_LABEL
 #else
-#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0
+#define NFSD4_2_SECURITY_ATTRS		9
 #endif
 
+#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
+	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+	NFSD4_2_SECURITY_ATTRS)
+
 static inline u32 nfsd_suppattrs0(u32 minorversion)
 {
 	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-08 12:11 nfsd: add support for the chage_attr_type attribute Christoph Hellwig
  2014-11-08 12:11 ` [PATCH 1/2] nfsd: correctly define v4.2 support attributes Christoph Hellwig
@ 2014-11-08 12:11 ` Christoph Hellwig
  2014-11-10 17:54   ` J. Bruce Fields
  2014-11-10 21:42 ` nfsd: add support for the " J. Bruce Fields
  2 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2014-11-08 12:11 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, linux-nfs

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/nfs4xdr.c    | 16 ++++++++++++++++
 fs/nfsd/nfsd.h       |  2 +-
 include/linux/nfs4.h |  9 +++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index eeea7a9..3205b55 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1730,6 +1730,15 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
 	return p;
 }
 
+static __be32 *encode_change_attr_type(__be32 *p, struct inode *inode)
+{
+	if (IS_I_VERSION(inode))
+		*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_VERSION_COUNTER);
+	else
+		*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
+	return p;
+}
+
 static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
 {
 	*p++ = cpu_to_be32(c->atomic);
@@ -2535,6 +2544,13 @@ out_acl:
 		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
 	}
 
+	if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			goto out_resource;
+		p = encode_change_attr_type(p, dentry->d_inode);
+	}
+
 	attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
 	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
 	status = nfs_ok;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 43b6a36..59a734f 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -342,7 +342,7 @@ void		nfsd_lockd_shutdown(void);
 
 #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
 	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
-	NFSD4_2_SECURITY_ATTRS)
+	FATTR4_WORD2_CHANGE_ATTR_TYPE | NFSD4_2_SECURITY_ATTRS)
 
 static inline u32 nfsd_suppattrs0(u32 minorversion)
 {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 356acc2..85ccd06 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -413,6 +413,7 @@ enum lock_type4 {
 #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE   (1UL << 15)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 
 /* MDS threshold bitmap bits */
@@ -558,4 +559,12 @@ enum data_content4 {
 	NFS4_CONTENT_HOLE		= 1,
 };
 
+enum change_attr_type4 {
+	NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR	= 0,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER	= 1,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+	NFS4_CHANGE_TYPE_IS_TIME_METADATA	= 3,
+	NFS4_CHANGE_TYPE_IS_UNDEFINED		= 4
+};
+
 #endif
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-08 12:11 ` [PATCH 2/2] nfsd: implement chage_attr_type attribute Christoph Hellwig
@ 2014-11-10 17:54   ` J. Bruce Fields
  2014-11-11 10:27     ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: J. Bruce Fields @ 2014-11-10 17:54 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Trond Myklebust, linux-nfs

On Sat, Nov 08, 2014 at 01:11:04PM +0100, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/nfsd/nfs4xdr.c    | 16 ++++++++++++++++
>  fs/nfsd/nfsd.h       |  2 +-
>  include/linux/nfs4.h |  9 +++++++++
>  3 files changed, 26 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index eeea7a9..3205b55 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1730,6 +1730,15 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
>  	return p;
>  }
>  
> +static __be32 *encode_change_attr_type(__be32 *p, struct inode *inode)
> +{
> +	if (IS_I_VERSION(inode))
> +		*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_VERSION_COUNTER);

Shouldn't that be NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR?

The draft says that e.g. "If the client sees
NFS4_CHANGE_TYPE_IS_VERSION_COUNTER, it has the ability to predict what
the resulting change attribute value should be after a COMPOUND
containing a SETATTR, WRITE, or CREATE."

Admittedly, I'm not completely sure what that means.  (Is a SETATTR of
multiple attributes a single atomic change?  Can we predict the change
attribute on a newly created file, or only on the parent directory?)  I
also don't know where the filesystems do the i_version increment (can we
guarantee it happens once per nfs WRITE?).

--b.

> +	else
> +		*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
> +	return p;
> +}
> +
>  static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
>  {
>  	*p++ = cpu_to_be32(c->atomic);
> @@ -2535,6 +2544,13 @@ out_acl:
>  		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
>  	}
>  
> +	if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
> +		p = xdr_reserve_space(xdr, 4);
> +		if (!p)
> +			goto out_resource;
> +		p = encode_change_attr_type(p, dentry->d_inode);
> +	}
> +
>  	attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
>  	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
>  	status = nfs_ok;
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 43b6a36..59a734f 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -342,7 +342,7 @@ void		nfsd_lockd_shutdown(void);
>  
>  #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
>  	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
> -	NFSD4_2_SECURITY_ATTRS)
> +	FATTR4_WORD2_CHANGE_ATTR_TYPE | NFSD4_2_SECURITY_ATTRS)
>  
>  static inline u32 nfsd_suppattrs0(u32 minorversion)
>  {
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 356acc2..85ccd06 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -413,6 +413,7 @@ enum lock_type4 {
>  #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
>  #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
>  #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
> +#define FATTR4_WORD2_CHANGE_ATTR_TYPE   (1UL << 15)
>  #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
>  
>  /* MDS threshold bitmap bits */
> @@ -558,4 +559,12 @@ enum data_content4 {
>  	NFS4_CONTENT_HOLE		= 1,
>  };
>  
> +enum change_attr_type4 {
> +	NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR	= 0,
> +	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER	= 1,
> +	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
> +	NFS4_CHANGE_TYPE_IS_TIME_METADATA	= 3,
> +	NFS4_CHANGE_TYPE_IS_UNDEFINED		= 4
> +};
> +
>  #endif
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: nfsd: add support for the chage_attr_type attribute
  2014-11-08 12:11 nfsd: add support for the chage_attr_type attribute Christoph Hellwig
  2014-11-08 12:11 ` [PATCH 1/2] nfsd: correctly define v4.2 support attributes Christoph Hellwig
  2014-11-08 12:11 ` [PATCH 2/2] nfsd: implement chage_attr_type attribute Christoph Hellwig
@ 2014-11-10 21:42 ` J. Bruce Fields
  2 siblings, 0 replies; 24+ messages in thread
From: J. Bruce Fields @ 2014-11-10 21:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Trond Myklebust, linux-nfs

On Sat, Nov 08, 2014 at 01:11:02PM +0100, Christoph Hellwig wrote:
> The first one is a bug fix that probably should go into 3.18-rc and -stable.
> 

Agreed, thanks.--b.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] nfsd: correctly define v4.2 support attributes
  2014-11-08 12:11 ` [PATCH 1/2] nfsd: correctly define v4.2 support attributes Christoph Hellwig
@ 2014-11-10 22:21   ` J. Bruce Fields
  2014-11-11 10:22     ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: J. Bruce Fields @ 2014-11-10 22:21 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Trond Myklebust, linux-nfs

On Sat, Nov 08, 2014 at 01:11:03PM +0100, Christoph Hellwig wrote:
> Even when security labels are disabled we support at least the same
> attributes as v4.1.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/nfsd/nfsd.h | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 747f3b95..43b6a36 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -335,12 +335,15 @@ void		nfsd_lockd_shutdown(void);
>  	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
>  
>  #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
> -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
> -	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL)
> +#define NFSD4_2_SECURITY_ATTRS		FATTR4_WORD2_SECURITY_LABEL
>  #else
> -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0
> +#define NFSD4_2_SECURITY_ATTRS		9

I'm assuming that 9 was meant to be a 0.

--b.

>  #endif
>  
> +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
> +	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
> +	NFSD4_2_SECURITY_ATTRS)
> +
>  static inline u32 nfsd_suppattrs0(u32 minorversion)
>  {
>  	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] nfsd: correctly define v4.2 support attributes
  2014-11-10 22:21   ` J. Bruce Fields
@ 2014-11-11 10:22     ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-11-11 10:22 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, linux-nfs

On Mon, Nov 10, 2014 at 05:21:10PM -0500, J. Bruce Fields wrote:
> I'm assuming that 9 was meant to be a 0.

Oops, of course.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-10 17:54   ` J. Bruce Fields
@ 2014-11-11 10:27     ` Christoph Hellwig
  2014-11-11 12:36       ` Trond Myklebust
  2014-11-11 21:42       ` J. Bruce Fields
  0 siblings, 2 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-11-11 10:27 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Christoph Hellwig, Trond Myklebust, linux-nfs

On Mon, Nov 10, 2014 at 12:54:24PM -0500, J. Bruce Fields wrote:
> Shouldn't that be NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR?
> 
> The draft says that e.g. "If the client sees
> NFS4_CHANGE_TYPE_IS_VERSION_COUNTER, it has the ability to predict what
> the resulting change attribute value should be after a COMPOUND
> containing a SETATTR, WRITE, or CREATE."
> 
> Admittedly, I'm not completely sure what that means.  (Is a SETATTR of
> multiple attributes a single atomic change?  Can we predict the change
> attribute on a newly created file, or only on the parent directory?)  I
> also don't know where the filesystems do the i_version increment (can we
> guarantee it happens once per nfs WRITE?).

Actually the server may increment it many times for a single WRITE,
for XFS it is incremented for each dirty transaction, which could
happen many times during a single write:

 (1) c/mtime update
 (2) suid/sgid bit removal
 (3) block allocation (could be multiple transactions)


So I guess we really should move to NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR
instead.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-11 10:27     ` Christoph Hellwig
@ 2014-11-11 12:36       ` Trond Myklebust
  2014-11-11 16:27         ` Christoph Hellwig
  2014-11-11 21:42       ` J. Bruce Fields
  1 sibling, 1 reply; 24+ messages in thread
From: Trond Myklebust @ 2014-11-11 12:36 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: J. Bruce Fields, Linux NFS Mailing List

On Tue, Nov 11, 2014 at 5:27 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Nov 10, 2014 at 12:54:24PM -0500, J. Bruce Fields wrote:
>> Shouldn't that be NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR?
>>
>> The draft says that e.g. "If the client sees
>> NFS4_CHANGE_TYPE_IS_VERSION_COUNTER, it has the ability to predict what
>> the resulting change attribute value should be after a COMPOUND
>> containing a SETATTR, WRITE, or CREATE."
>>
>> Admittedly, I'm not completely sure what that means.  (Is a SETATTR of
>> multiple attributes a single atomic change?  Can we predict the change
>> attribute on a newly created file, or only on the parent directory?)  I
>> also don't know where the filesystems do the i_version increment (can we
>> guarantee it happens once per nfs WRITE?).
>
> Actually the server may increment it many times for a single WRITE,
> for XFS it is incremented for each dirty transaction, which could
> happen many times during a single write:
>
>  (1) c/mtime update
>  (2) suid/sgid bit removal
>  (3) block allocation (could be multiple transactions)
>
>
> So I guess we really should move to NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR
> instead.

Agreed.

Just out of curiosity, though, why does XFS update i_version on every
block allocation? For NFSv4 compatibility, it really should suffice
for it to just do the update once in the above case. Did Lustre have
more stringent requirements?

-- 
Trond Myklebust

Linux NFS client maintainer, PrimaryData

trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-11 12:36       ` Trond Myklebust
@ 2014-11-11 16:27         ` Christoph Hellwig
  2014-11-11 22:27           ` Dave Chinner
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2014-11-11 16:27 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: J. Bruce Fields, Linux NFS Mailing List, Dave Chinner

On Tue, Nov 11, 2014 at 07:36:48AM -0500, Trond Myklebust wrote:
> Just out of curiosity, though, why does XFS update i_version on every
> block allocation? For NFSv4 compatibility, it really should suffice
> for it to just do the update once in the above case. Did Lustre have
> more stringent requirements?

It's literatlly updated everytime inode metadata changes.  Dave implemented
this, so I don't know the real rationale behind it.  I don't think anyone
sane cares about Lustre ever, certainly not for XFS.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-11 10:27     ` Christoph Hellwig
  2014-11-11 12:36       ` Trond Myklebust
@ 2014-11-11 21:42       ` J. Bruce Fields
  2014-12-01 19:50         ` J. Bruce Fields
  1 sibling, 1 reply; 24+ messages in thread
From: J. Bruce Fields @ 2014-11-11 21:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Trond Myklebust, linux-nfs

On Tue, Nov 11, 2014 at 11:27:35AM +0100, Christoph Hellwig wrote:
> On Mon, Nov 10, 2014 at 12:54:24PM -0500, J. Bruce Fields wrote:
> > Shouldn't that be NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR?
> > 
> > The draft says that e.g. "If the client sees
> > NFS4_CHANGE_TYPE_IS_VERSION_COUNTER, it has the ability to predict what
> > the resulting change attribute value should be after a COMPOUND
> > containing a SETATTR, WRITE, or CREATE."
> > 
> > Admittedly, I'm not completely sure what that means.  (Is a SETATTR of
> > multiple attributes a single atomic change?  Can we predict the change
> > attribute on a newly created file, or only on the parent directory?)  I
> > also don't know where the filesystems do the i_version increment (can we
> > guarantee it happens once per nfs WRITE?).
> 
> Actually the server may increment it many times for a single WRITE,
> for XFS it is incremented for each dirty transaction, which could
> happen many times during a single write:
> 
>  (1) c/mtime update
>  (2) suid/sgid bit removal
>  (3) block allocation (could be multiple transactions)
> 
> 
> So I guess we really should move to NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR
> instead.

I'm applying your patch for 3.19 with that one-line change.

--b.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-11 16:27         ` Christoph Hellwig
@ 2014-11-11 22:27           ` Dave Chinner
  2014-11-12 10:24             ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Chinner @ 2014-11-11 22:27 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Trond Myklebust, J. Bruce Fields, Linux NFS Mailing List

On Tue, Nov 11, 2014 at 05:27:04PM +0100, Christoph Hellwig wrote:
> On Tue, Nov 11, 2014 at 07:36:48AM -0500, Trond Myklebust wrote:
> > Just out of curiosity, though, why does XFS update i_version on every
> > block allocation? For NFSv4 compatibility, it really should suffice
> > for it to just do the update once in the above case. Did Lustre have
> > more stringent requirements?
> 
> It's literatlly updated everytime inode metadata changes.  Dave implemented
> this, so I don't know the real rationale behind it.  I don't think anyone
> sane cares about Lustre ever, certainly not for XFS.

/me goes for the thread to understand the context of the question

To clarify what Christoph wrote, XFS updates i_version is updated
once per transaction that modifies the inode. So if a VFS level
operation results in multiple transactions then each transaction
will but the version.

It was implemented that way because nobody could tell me what the
actual granularity requirement for change detection was.  Hence what
I implemented was "be able to detect any persistent change that is
made" to cover all bases.

On Tue, Nov 11, 2014 at 05:28:49PM +0100, Christoph Hellwig wrote:
> On Tue, Nov 11, 2014 at 05:27:04PM +0100, Christoph Hellwig wrote:
> > On Tue, Nov 11, 2014 at 07:36:48AM -0500, Trond Myklebust wrote:
> > > Just out of curiosity, though, why does XFS update i_version on every
> > > block allocation? For NFSv4 compatibility, it really should suffice
> > > for it to just do the update once in the above case. Did Lustre have
> > > more stringent requirements?
> > 
> > It's literatlly updated everytime inode metadata changes.  Dave implemented
> > this, so I don't know the real rationale behind it.  I don't think anyone
> > sane cares about Lustre ever, certainly not for XFS.
> 
> Btw, I think we have an even more important issue with di_changecount:
> as far as I tell we never initialize i_version to di_changecount when
> reading the inode from disk, so inodes might move backwards in their
> version.  I'll take care of this once we have settled the nfs semantics.

Yup, that's an oversight. The problem of adding functionality that
people want but aren't using and don't know exactly how they are
going to use it is that there's nothing to test that it's really
doing the right thing...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-11 22:27           ` Dave Chinner
@ 2014-11-12 10:24             ` Christoph Hellwig
  2014-11-12 14:26               ` Trond Myklebust
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2014-11-12 10:24 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Trond Myklebust, J. Bruce Fields, Linux NFS Mailing List

On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
> To clarify what Christoph wrote, XFS updates i_version is updated
> once per transaction that modifies the inode. So if a VFS level
> operation results in multiple transactions then each transaction
> will but the version.
> 
> It was implemented that way because nobody could tell me what the
> actual granularity requirement for change detection was.  Hence what
> I implemented was "be able to detect any persistent change that is
> made" to cover all bases.

Honestly the XFS implementation seems most sensible, and easiest to
verify for me.  I don't really understand the rationale behind the
fairly convoluted NFS4_CHANGE_TYPE_IS_VERSION_COUNTER semantics, and
I doubt you could actually implemet them on any Unix-like semantics.

Trond, given that the language in the standard is from you:

 1) how do you expect to use NFS4_CHANGE_TYPE_IS_VERSION_COUNTER
    semantics in the client
 2) what server do you have in mind that could actually implement them?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-12 10:24             ` Christoph Hellwig
@ 2014-11-12 14:26               ` Trond Myklebust
  2014-11-13  0:28                 ` Dave Chinner
  0 siblings, 1 reply; 24+ messages in thread
From: Trond Myklebust @ 2014-11-12 14:26 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Dave Chinner, J. Bruce Fields, Linux NFS Mailing List

On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
>> To clarify what Christoph wrote, XFS updates i_version is updated
>> once per transaction that modifies the inode. So if a VFS level
>> operation results in multiple transactions then each transaction
>> will but the version.
>>
>> It was implemented that way because nobody could tell me what the
>> actual granularity requirement for change detection was.  Hence what
>> I implemented was "be able to detect any persistent change that is
>> made" to cover all bases.
>
> Honestly the XFS implementation seems most sensible, and easiest to
> verify for me.  I don't really understand the rationale behind the
> fairly convoluted NFS4_CHANGE_TYPE_IS_VERSION_COUNTER semantics, and
> I doubt you could actually implemet them on any Unix-like semantics.
>
> Trond, given that the language in the standard is from you:
>
>  1) how do you expect to use NFS4_CHANGE_TYPE_IS_VERSION_COUNTER
>     semantics in the client

Basically, I'd like to use it the same way that AFS does. I want to be
able to issue an RPC call which does the equivalent of a single system
call (e.g. mkdir(), write(), link(), unlink(), etc) and be able to
predict what the effect should be on the change attribute (1 increment
on the parent directory for a successful mkdir(), 1 increment on the
file for a successful write(), ...) so that I can detect if someone
else has been modifying the file/directory/symlink while I wasn't
looking and hence know when I need to invalidate my cached
metadata+data for that object.

>  2) what server do you have in mind that could actually implement them?

As I said, AFS has this kind of semantics, and the AFS client on Linux
uses them to do this kind of 3rd party change detection.

-- 
Trond Myklebust

Linux NFS client maintainer, PrimaryData

trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-12 14:26               ` Trond Myklebust
@ 2014-11-13  0:28                 ` Dave Chinner
  2014-11-13 13:02                   ` Trond Myklebust
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Chinner @ 2014-11-13  0:28 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
> >> To clarify what Christoph wrote, XFS updates i_version is updated
> >> once per transaction that modifies the inode. So if a VFS level
> >> operation results in multiple transactions then each transaction
> >> will but the version.
> >>
> >> It was implemented that way because nobody could tell me what the
> >> actual granularity requirement for change detection was.  Hence what
> >> I implemented was "be able to detect any persistent change that is
> >> made" to cover all bases.
> >
> > Honestly the XFS implementation seems most sensible, and easiest to
> > verify for me.  I don't really understand the rationale behind the
> > fairly convoluted NFS4_CHANGE_TYPE_IS_VERSION_COUNTER semantics, and
> > I doubt you could actually implemet them on any Unix-like semantics.
> >
> > Trond, given that the language in the standard is from you:
> >
> >  1) how do you expect to use NFS4_CHANGE_TYPE_IS_VERSION_COUNTER
> >     semantics in the client
> 
> Basically, I'd like to use it the same way that AFS does. I want to be
> able to issue an RPC call which does the equivalent of a single system
> call (e.g. mkdir(), write(), link(), unlink(), etc) and be able to
> predict what the effect should be on the change attribute (1 increment
> on the parent directory for a successful mkdir(), 1 increment on the
> file for a successful write(), ...)

That's not the way the change version counter is implemented in the
VFS or any filesystem. It's a low level change primitive, not
something that is only updated on a syscall granularity.

I just can't see how a change counter at the syscall level can be
made to work reliably. NFS clients are now being told about server
block maps, so any extent map modification done by the underlying
filesystem needs to bump the change count so if the client is
caching the block map it can be invalidated. And with functionality
like delayed allocation modifications the client needs to know aout
can happen at any time and so change count modification can not be
limited only to syscall activity.

> so that I can detect if someone
> else has been modifying the file/directory/symlink while I wasn't
> looking and hence know when I need to invalidate my cached
> metadata+data for that object.

The only way to use the change count sanely from the client is as a
"check-and-execute" cookie on the server. If the change count sent
by the client is unchanged at the server then the server can execute
the operation. It can then return the new cookie to the client for
the next operation.  But we can't even do that sanely on Linux
because the check-and-execute operation needs to be atomic and hence
requires the filesystem to do it deep inside their transaction
subsystems once they've taken the locks it needs to ensure the
change count is stable.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-13  0:28                 ` Dave Chinner
@ 2014-11-13 13:02                   ` Trond Myklebust
  2014-11-13 21:47                     ` Dave Chinner
  2014-11-13 23:54                     ` Dave Chinner
  0 siblings, 2 replies; 24+ messages in thread
From: Trond Myklebust @ 2014-11-13 13:02 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Wed, Nov 12, 2014 at 7:28 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
>> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
>> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
>> >> To clarify what Christoph wrote, XFS updates i_version is updated
>> >> once per transaction that modifies the inode. So if a VFS level
>> >> operation results in multiple transactions then each transaction
>> >> will but the version.
>> >>
>> >> It was implemented that way because nobody could tell me what the
>> >> actual granularity requirement for change detection was.  Hence what
>> >> I implemented was "be able to detect any persistent change that is
>> >> made" to cover all bases.
>> >
>> > Honestly the XFS implementation seems most sensible, and easiest to
>> > verify for me.  I don't really understand the rationale behind the
>> > fairly convoluted NFS4_CHANGE_TYPE_IS_VERSION_COUNTER semantics, and
>> > I doubt you could actually implemet them on any Unix-like semantics.
>> >
>> > Trond, given that the language in the standard is from you:
>> >
>> >  1) how do you expect to use NFS4_CHANGE_TYPE_IS_VERSION_COUNTER
>> >     semantics in the client
>>
>> Basically, I'd like to use it the same way that AFS does. I want to be
>> able to issue an RPC call which does the equivalent of a single system
>> call (e.g. mkdir(), write(), link(), unlink(), etc) and be able to
>> predict what the effect should be on the change attribute (1 increment
>> on the parent directory for a successful mkdir(), 1 increment on the
>> file for a successful write(), ...)
>
> That's not the way the change version counter is implemented in the
> VFS or any filesystem. It's a low level change primitive, not
> something that is only updated on a syscall granularity.
>
> I just can't see how a change counter at the syscall level can be
> made to work reliably. NFS clients are now being told about server
> block maps, so any extent map modification done by the underlying
> filesystem needs to bump the change count so if the client is
> caching the block map it can be invalidated. And with functionality
> like delayed allocation modifications the client needs to know aout
> can happen at any time and so change count modification can not be
> limited only to syscall activity.

I didn't say it needs to be implemented in the VFS. Just that it needs
to be implemented in a way that makes sense if you are doing the
equivalent of a system call. Delayed allocations are a filesystem
implementation detail that do not change the application visible data
or metadata contents of the file; there should be no reason to have
them reflected in something like the change attribute.

As for pNFS blocks, I agree that the spec there is a little iffy, but
the intention was, I believe, that the whole LAYOUTGET->LAYOUTCOMMIT
should be considered to be a single filesystem transaction. However
the iffiness there is the main reason why I made a distinction between
pnfs vs. non-pnfs when describing the change attribute.

>> so that I can detect if someone
>> else has been modifying the file/directory/symlink while I wasn't
>> looking and hence know when I need to invalidate my cached
>> metadata+data for that object.
>
> The only way to use the change count sanely from the client is as a
> "check-and-execute" cookie on the server. If the change count sent
> by the client is unchanged at the server then the server can execute
> the operation. It can then return the new cookie to the client for
> the next operation.  But we can't even do that sanely on Linux
> because the check-and-execute operation needs to be atomic and hence
> requires the filesystem to do it deep inside their transaction
> subsystems once they've taken the locks it needs to ensure the
> change count is stable.

Applications are required to interact with the filesystem through a
well-defined API. Application visible data and metadata changes can be
(and are mostly) well defined w.r.t. that API. Where be the dragons?

-- 
Trond Myklebust

Linux NFS client maintainer, PrimaryData

trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-13 13:02                   ` Trond Myklebust
@ 2014-11-13 21:47                     ` Dave Chinner
  2014-11-13 23:54                     ` Dave Chinner
  1 sibling, 0 replies; 24+ messages in thread
From: Dave Chinner @ 2014-11-13 21:47 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Thu, Nov 13, 2014 at 08:02:43AM -0500, Trond Myklebust wrote:
> On Wed, Nov 12, 2014 at 7:28 PM, Dave Chinner <david@fromorbit.com> wrote:
> > On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
> >> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
> >> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
> >> >> To clarify what Christoph wrote, XFS updates i_version is updated
> >> >> once per transaction that modifies the inode. So if a VFS level
> >> >> operation results in multiple transactions then each transaction
> >> >> will but the version.
> >> >>
> >> >> It was implemented that way because nobody could tell me what the
> >> >> actual granularity requirement for change detection was.  Hence what
> >> >> I implemented was "be able to detect any persistent change that is
> >> >> made" to cover all bases.
> >> >
> >> > Honestly the XFS implementation seems most sensible, and easiest to
> >> > verify for me.  I don't really understand the rationale behind the
> >> > fairly convoluted NFS4_CHANGE_TYPE_IS_VERSION_COUNTER semantics, and
> >> > I doubt you could actually implemet them on any Unix-like semantics.
> >> >
> >> > Trond, given that the language in the standard is from you:
> >> >
> >> >  1) how do you expect to use NFS4_CHANGE_TYPE_IS_VERSION_COUNTER
> >> >     semantics in the client
> >>
> >> Basically, I'd like to use it the same way that AFS does. I want to be
> >> able to issue an RPC call which does the equivalent of a single system
> >> call (e.g. mkdir(), write(), link(), unlink(), etc) and be able to
> >> predict what the effect should be on the change attribute (1 increment
> >> on the parent directory for a successful mkdir(), 1 increment on the
> >> file for a successful write(), ...)
> >
> > That's not the way the change version counter is implemented in the
> > VFS or any filesystem. It's a low level change primitive, not
> > something that is only updated on a syscall granularity.
> >
> > I just can't see how a change counter at the syscall level can be
> > made to work reliably. NFS clients are now being told about server
> > block maps, so any extent map modification done by the underlying
> > filesystem needs to bump the change count so if the client is
> > caching the block map it can be invalidated. And with functionality
> > like delayed allocation modifications the client needs to know aout
> > can happen at any time and so change count modification can not be
> > limited only to syscall activity.
> 
> I didn't say it needs to be implemented in the VFS. Just that it needs
> to be implemented in a way that makes sense if you are doing the
> equivalent of a system call.

Your requirements indicate that the functionality can only be
implemented in the VFS - filesystems themselves have no idea what
system call operations are, and often they get multiple entries from
the VFS for one system call.  i.e. "one increment per syscall" is
not a problem individual filesystems can solve.

> Delayed allocations are a filesystem
> implementation detail that do not change the application visible data
> or metadata contents of the file; there should be no reason to have
> them reflected in something like the change attribute.

Delayed allocation changes inode metadata in user visible ways
because extent maps are user visible metadata.....

> As for pNFS blocks, I agree that the spec there is a little iffy, but
> the intention was, I believe, that the whole LAYOUTGET->LAYOUTCOMMIT
> should be considered to be a single filesystem transaction. However
> the iffiness there is the main reason why I made a distinction between
> pnfs vs. non-pnfs when describing the change attribute.

Forget pNFS, all the new stuff for exposing sparse files to the
client (e.g. FIEMAP, SEEK_DATA/HOLE) as well as preallocation mean
that filesystem allocation of any kind causes application visible
metadata changes on a file.

> >> so that I can detect if someone
> >> else has been modifying the file/directory/symlink while I wasn't
> >> looking and hence know when I need to invalidate my cached
> >> metadata+data for that object.
> >
> > The only way to use the change count sanely from the client is as a
> > "check-and-execute" cookie on the server. If the change count sent
> > by the client is unchanged at the server then the server can execute
> > the operation. It can then return the new cookie to the client for
> > the next operation.  But we can't even do that sanely on Linux
> > because the check-and-execute operation needs to be atomic and hence
> > requires the filesystem to do it deep inside their transaction
> > subsystems once they've taken the locks it needs to ensure the
> > change count is stable.
> 
> Applications are required to interact with the filesystem through a
> well-defined API. Application visible data and metadata changes can be
> (and are mostly) well defined w.r.t. that API. Where be the dragons?

That API isn't as well defined as you think. Every filesystem has
it's own ioctls that allow all sorts of interesting things to be
done. What we define as "application visible" appears to be
different because our problem scope is different. That's where
the dragons lie, and why you're going to need explicit NFS behaviour
to be implemented through the VFS if you want a specific set of
well defined behaviours. Otherwise you are going to get whatever
each individual filesystem considers a change in that counter.

And given that, clients simply can't make assumptions about how
server side counters change w.r.t. modifications being made because
servers (and potentially even exports on a server) are going to
differ in behaviour. The only sane way to detect a change has
occurred is for the change counter to be returned as a post-op
attribute on a file operation and for that value to be used as a
pre-condition for the next operation to be performed that is sent to
the server....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-13 13:02                   ` Trond Myklebust
  2014-11-13 21:47                     ` Dave Chinner
@ 2014-11-13 23:54                     ` Dave Chinner
  2014-11-14  0:43                       ` Trond Myklebust
  1 sibling, 1 reply; 24+ messages in thread
From: Dave Chinner @ 2014-11-13 23:54 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Thu, Nov 13, 2014 at 08:02:43AM -0500, Trond Myklebust wrote:
> On Wed, Nov 12, 2014 at 7:28 PM, Dave Chinner <david@fromorbit.com> wrote:
> > On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
> >> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
> >> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
> >> >> To clarify what Christoph wrote, XFS updates i_version is updated
> >> >> once per transaction that modifies the inode. So if a VFS level
> >> >> operation results in multiple transactions then each transaction
> >> >> will but the version.
> >> >>
> >> >> It was implemented that way because nobody could tell me what the
> >> >> actual granularity requirement for change detection was.  Hence what
> >> >> I implemented was "be able to detect any persistent change that is
> >> >> made" to cover all bases.

FWIW, ext4 takes the same approach.  See Ted's post today:

http://www.spinics.net/lists/linux-ext4/msg46194.html

"The inode_inc_iversion() in mark4_ext4_iloc_dirty() is probably not
necessary, since we already should be incrementing i_version whenever
ctime and mtime gets updated.  The inode_inc_iversion() there is more
of a "belt and suspenders" safety thing, on the theory that the extra
bump in i_version won't hurt anything."

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-13 23:54                     ` Dave Chinner
@ 2014-11-14  0:43                       ` Trond Myklebust
  2014-11-14  4:35                         ` Dave Chinner
  0 siblings, 1 reply; 24+ messages in thread
From: Trond Myklebust @ 2014-11-14  0:43 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Thu, Nov 13, 2014 at 6:54 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Thu, Nov 13, 2014 at 08:02:43AM -0500, Trond Myklebust wrote:
>> On Wed, Nov 12, 2014 at 7:28 PM, Dave Chinner <david@fromorbit.com> wrote:
>> > On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
>> >> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
>> >> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
>> >> >> To clarify what Christoph wrote, XFS updates i_version is updated
>> >> >> once per transaction that modifies the inode. So if a VFS level
>> >> >> operation results in multiple transactions then each transaction
>> >> >> will but the version.
>> >> >>
>> >> >> It was implemented that way because nobody could tell me what the
>> >> >> actual granularity requirement for change detection was.  Hence what
>> >> >> I implemented was "be able to detect any persistent change that is
>> >> >> made" to cover all bases.
>
> FWIW, ext4 takes the same approach.  See Ted's post today:
>
> http://www.spinics.net/lists/linux-ext4/msg46194.html
>
> "The inode_inc_iversion() in mark4_ext4_iloc_dirty() is probably not
> necessary, since we already should be incrementing i_version whenever
> ctime and mtime gets updated.  The inode_inc_iversion() there is more
> of a "belt and suspenders" safety thing, on the theory that the extra
> bump in i_version won't hurt anything."
>

It will hurt if it causes all the NFS clients to blow their caches
unnecessarily.

Who asked for this?

-- 
Trond Myklebust

Linux NFS client maintainer, PrimaryData

trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-14  0:43                       ` Trond Myklebust
@ 2014-11-14  4:35                         ` Dave Chinner
  2014-11-14 14:22                           ` Trond Myklebust
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Chinner @ 2014-11-14  4:35 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Thu, Nov 13, 2014 at 07:43:33PM -0500, Trond Myklebust wrote:
> On Thu, Nov 13, 2014 at 6:54 PM, Dave Chinner <david@fromorbit.com> wrote:
> > On Thu, Nov 13, 2014 at 08:02:43AM -0500, Trond Myklebust wrote:
> >> On Wed, Nov 12, 2014 at 7:28 PM, Dave Chinner <david@fromorbit.com> wrote:
> >> > On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
> >> >> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
> >> >> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
> >> >> >> To clarify what Christoph wrote, XFS updates i_version is updated
> >> >> >> once per transaction that modifies the inode. So if a VFS level
> >> >> >> operation results in multiple transactions then each transaction
> >> >> >> will but the version.
> >> >> >>
> >> >> >> It was implemented that way because nobody could tell me what the
> >> >> >> actual granularity requirement for change detection was.  Hence what
> >> >> >> I implemented was "be able to detect any persistent change that is
> >> >> >> made" to cover all bases.
> >
> > FWIW, ext4 takes the same approach.  See Ted's post today:
> >
> > http://www.spinics.net/lists/linux-ext4/msg46194.html
> >
> > "The inode_inc_iversion() in mark4_ext4_iloc_dirty() is probably not
> > necessary, since we already should be incrementing i_version whenever
> > ctime and mtime gets updated.  The inode_inc_iversion() there is more
> > of a "belt and suspenders" safety thing, on the theory that the extra
> > bump in i_version won't hurt anything."
> >
> 
> It will hurt if it causes all the NFS clients to blow their caches
> unnecessarily.

Not my problem. We've just implemented what we were asked to
implement.

> Who asked for this?

The only discussion where actual specifications were enumerated was
during a thread about using i_version in the integrity measurement
code (IMA subsystem). The NFSv4 requirements for the change counter
were expressed here:

https://lkml.org/lkml/2012/1/5/408

Don't blame us for implementing the vague "changes every time"
requirements in a way that results in no chance of a persistent
change to either data or metadata being missed by the filesystem.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-14  4:35                         ` Dave Chinner
@ 2014-11-14 14:22                           ` Trond Myklebust
  2014-11-15  1:24                             ` Dave Chinner
  0 siblings, 1 reply; 24+ messages in thread
From: Trond Myklebust @ 2014-11-14 14:22 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Thu, Nov 13, 2014 at 11:35 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Thu, Nov 13, 2014 at 07:43:33PM -0500, Trond Myklebust wrote:
>> On Thu, Nov 13, 2014 at 6:54 PM, Dave Chinner <david@fromorbit.com> wrote:
>> > On Thu, Nov 13, 2014 at 08:02:43AM -0500, Trond Myklebust wrote:
>> >> On Wed, Nov 12, 2014 at 7:28 PM, Dave Chinner <david@fromorbit.com> wrote:
>> >> > On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
>> >> >> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
>> >> >> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
>> >> >> >> To clarify what Christoph wrote, XFS updates i_version is updated
>> >> >> >> once per transaction that modifies the inode. So if a VFS level
>> >> >> >> operation results in multiple transactions then each transaction
>> >> >> >> will but the version.
>> >> >> >>
>> >> >> >> It was implemented that way because nobody could tell me what the
>> >> >> >> actual granularity requirement for change detection was.  Hence what
>> >> >> >> I implemented was "be able to detect any persistent change that is
>> >> >> >> made" to cover all bases.
>> >
>> > FWIW, ext4 takes the same approach.  See Ted's post today:
>> >
>> > http://www.spinics.net/lists/linux-ext4/msg46194.html
>> >
>> > "The inode_inc_iversion() in mark4_ext4_iloc_dirty() is probably not
>> > necessary, since we already should be incrementing i_version whenever
>> > ctime and mtime gets updated.  The inode_inc_iversion() there is more
>> > of a "belt and suspenders" safety thing, on the theory that the extra
>> > bump in i_version won't hurt anything."
>> >
>>
>> It will hurt if it causes all the NFS clients to blow their caches
>> unnecessarily.
>
> Not my problem. We've just implemented what we were asked to
> implement.
>
>> Who asked for this?
>
> The only discussion where actual specifications were enumerated was
> during a thread about using i_version in the integrity measurement
> code (IMA subsystem). The NFSv4 requirements for the change counter
> were expressed here:
>
> https://lkml.org/lkml/2012/1/5/408
>
> Don't blame us for implementing the vague "changes every time"
> requirements in a way that results in no chance of a persistent
> change to either data or metadata being missed by the filesystem.

I'm not blaming anyone. I'm stating that I'm not aware of anybody who
needs to trace fiemap changes via the change attribute, and so I'm
asking where that requirement came from?
The NFSv4 requirements are specific if you take them within the
context of NFS, which is all about clients not having to care about
whether the data is stored on SSD block devices or deduped stone
tablets. Fiemap, being block specific, is irrelevant to that
environment.

So do the IMA folks use i_version and, if so, how are they using it?

-- 
Trond Myklebust

Linux NFS client maintainer, PrimaryData

trond.myklebust@primarydata.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-14 14:22                           ` Trond Myklebust
@ 2014-11-15  1:24                             ` Dave Chinner
  0 siblings, 0 replies; 24+ messages in thread
From: Dave Chinner @ 2014-11-15  1:24 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. Bruce Fields, Linux NFS Mailing List

On Fri, Nov 14, 2014 at 09:22:14AM -0500, Trond Myklebust wrote:
> On Thu, Nov 13, 2014 at 11:35 PM, Dave Chinner <david@fromorbit.com> wrote:
> > On Thu, Nov 13, 2014 at 07:43:33PM -0500, Trond Myklebust wrote:
> >> On Thu, Nov 13, 2014 at 6:54 PM, Dave Chinner <david@fromorbit.com> wrote:
> >> > On Thu, Nov 13, 2014 at 08:02:43AM -0500, Trond Myklebust wrote:
> >> >> On Wed, Nov 12, 2014 at 7:28 PM, Dave Chinner <david@fromorbit.com> wrote:
> >> >> > On Wed, Nov 12, 2014 at 09:26:16AM -0500, Trond Myklebust wrote:
> >> >> >> On Wed, Nov 12, 2014 at 5:24 AM, Christoph Hellwig <hch@lst.de> wrote:
> >> >> >> > On Wed, Nov 12, 2014 at 09:27:10AM +1100, Dave Chinner wrote:
> >> >> >> >> To clarify what Christoph wrote, XFS updates i_version is updated
> >> >> >> >> once per transaction that modifies the inode. So if a VFS level
> >> >> >> >> operation results in multiple transactions then each transaction
> >> >> >> >> will but the version.
> >> >> >> >>
> >> >> >> >> It was implemented that way because nobody could tell me what the
> >> >> >> >> actual granularity requirement for change detection was.  Hence what
> >> >> >> >> I implemented was "be able to detect any persistent change that is
> >> >> >> >> made" to cover all bases.
> >> >
> >> > FWIW, ext4 takes the same approach.  See Ted's post today:
> >> >
> >> > http://www.spinics.net/lists/linux-ext4/msg46194.html
> >> >
> >> > "The inode_inc_iversion() in mark4_ext4_iloc_dirty() is probably not
> >> > necessary, since we already should be incrementing i_version whenever
> >> > ctime and mtime gets updated.  The inode_inc_iversion() there is more
> >> > of a "belt and suspenders" safety thing, on the theory that the extra
> >> > bump in i_version won't hurt anything."
> >> >
> >>
> >> It will hurt if it causes all the NFS clients to blow their caches
> >> unnecessarily.
> >
> > Not my problem. We've just implemented what we were asked to
> > implement.
> >
> >> Who asked for this?
> >
> > The only discussion where actual specifications were enumerated was
> > during a thread about using i_version in the integrity measurement
> > code (IMA subsystem). The NFSv4 requirements for the change counter
> > were expressed here:
> >
> > https://lkml.org/lkml/2012/1/5/408
> >
> > Don't blame us for implementing the vague "changes every time"
> > requirements in a way that results in no chance of a persistent
> > change to either data or metadata being missed by the filesystem.
> 
> I'm not blaming anyone. I'm stating that I'm not aware of anybody who
> needs to trace fiemap changes via the change attribute, and so I'm
> asking where that requirement came from?

It was seriously being considered - it appeared as a potential in
NFSv4 draft specs for handling sparse file reads. Indeed, this draft
directly mentions reading block maps from XFS and using it on the
client side:

http://tools.ietf.org/html/draft-hildebrand-nfsv4-read-sparse-00

"XFS supports the XFS_IOC_GETBMAP extended attribute, which returns
the allocation information for a file. Clients can then use this
information to only read allocated data blocks"

Now, I know that was from 2010, and the eventual 2014 NFSv4.2 RFC
doesn't have this in it, but go back 3-4 years ago when we were
trying to work out to how make an on-disk version counter work
sanely for all the different things we'd been hearing about were
going to be necessary for NFSv4....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-11-11 21:42       ` J. Bruce Fields
@ 2014-12-01 19:50         ` J. Bruce Fields
  2014-12-02 17:23           ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: J. Bruce Fields @ 2014-12-01 19:50 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Trond Myklebust, linux-nfs

On Tue, Nov 11, 2014 at 04:42:06PM -0500, J. Bruce Fields wrote:
> On Tue, Nov 11, 2014 at 11:27:35AM +0100, Christoph Hellwig wrote:
> > On Mon, Nov 10, 2014 at 12:54:24PM -0500, J. Bruce Fields wrote:
> > > Shouldn't that be NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR?
> > > 
> > > The draft says that e.g. "If the client sees
> > > NFS4_CHANGE_TYPE_IS_VERSION_COUNTER, it has the ability to predict what
> > > the resulting change attribute value should be after a COMPOUND
> > > containing a SETATTR, WRITE, or CREATE."
> > > 
> > > Admittedly, I'm not completely sure what that means.  (Is a SETATTR of
> > > multiple attributes a single atomic change?  Can we predict the change
> > > attribute on a newly created file, or only on the parent directory?)  I
> > > also don't know where the filesystems do the i_version increment (can we
> > > guarantee it happens once per nfs WRITE?).
> > 
> > Actually the server may increment it many times for a single WRITE,
> > for XFS it is incremented for each dirty transaction, which could
> > happen many times during a single write:
> > 
> >  (1) c/mtime update
> >  (2) suid/sgid bit removal
> >  (3) block allocation (could be multiple transactions)
> > 
> > 
> > So I guess we really should move to NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR
> > instead.
> 
> I'm applying your patch for 3.19 with that one-line change.

Actually, I'm having second thoughts given that there still seems to be
some argument about whether the change_attr_type thing is completely
right.  Maybe that only affects values other than
CHANGE_TYPE_IS_MONOTIC_INCR, but is there any urgency if we don't have a
client user for it yet?

--b.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] nfsd: implement chage_attr_type attribute
  2014-12-01 19:50         ` J. Bruce Fields
@ 2014-12-02 17:23           ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-12-02 17:23 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Christoph Hellwig, Trond Myklebust, linux-nfs

On Mon, Dec 01, 2014 at 02:50:21PM -0500, J. Bruce Fields wrote:
> Actually, I'm having second thoughts given that there still seems to be
> some argument about whether the change_attr_type thing is completely
> right.  Maybe that only affects values other than
> CHANGE_TYPE_IS_MONOTIC_INCR, but is there any urgency if we don't have a
> client user for it yet?

I don't see a real urgency, just wanted to make sure we have reference
implementations for more of 4.2 available.  I'd defintively be curious
about how the client could make use of these.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2014-12-02 17:23 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-11-08 12:11 nfsd: add support for the chage_attr_type attribute Christoph Hellwig
2014-11-08 12:11 ` [PATCH 1/2] nfsd: correctly define v4.2 support attributes Christoph Hellwig
2014-11-10 22:21   ` J. Bruce Fields
2014-11-11 10:22     ` Christoph Hellwig
2014-11-08 12:11 ` [PATCH 2/2] nfsd: implement chage_attr_type attribute Christoph Hellwig
2014-11-10 17:54   ` J. Bruce Fields
2014-11-11 10:27     ` Christoph Hellwig
2014-11-11 12:36       ` Trond Myklebust
2014-11-11 16:27         ` Christoph Hellwig
2014-11-11 22:27           ` Dave Chinner
2014-11-12 10:24             ` Christoph Hellwig
2014-11-12 14:26               ` Trond Myklebust
2014-11-13  0:28                 ` Dave Chinner
2014-11-13 13:02                   ` Trond Myklebust
2014-11-13 21:47                     ` Dave Chinner
2014-11-13 23:54                     ` Dave Chinner
2014-11-14  0:43                       ` Trond Myklebust
2014-11-14  4:35                         ` Dave Chinner
2014-11-14 14:22                           ` Trond Myklebust
2014-11-15  1:24                             ` Dave Chinner
2014-11-11 21:42       ` J. Bruce Fields
2014-12-01 19:50         ` J. Bruce Fields
2014-12-02 17:23           ` Christoph Hellwig
2014-11-10 21:42 ` nfsd: add support for the " J. Bruce Fields

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.