All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease
@ 2022-07-29 16:47 Jeff Layton
  2022-07-29 16:47 ` [PATCH 2/3] nfsd: print nf pointer in some nfsd_file tracepoints Jeff Layton
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Jeff Layton @ 2022-07-29 16:47 UTC (permalink / raw)
  To: chuck.lever; +Cc: linux-nfs

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/trace.h | 16 ++++++++++++++++
 fs/nfsd/vfs.c   |  2 ++
 2 files changed, 18 insertions(+)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 96bb6629541e..38fb1ca31010 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -531,6 +531,22 @@ DEFINE_STATEID_EVENT(open);
 DEFINE_STATEID_EVENT(deleg_read);
 DEFINE_STATEID_EVENT(deleg_recall);
 
+TRACE_EVENT(nfsd_open_break_lease,
+	TP_PROTO(struct inode *inode,
+		 int access),
+	TP_ARGS(inode, access),
+	TP_STRUCT__entry(
+		__field(void *, inode)
+		__field(int, access)
+	),
+	TP_fast_assign(
+		__entry->inode = inode;
+		__entry->access = access;
+	),
+	TP_printk("inode=%p access=%s", __entry->inode,
+		  show_nfsd_may_flags(__entry->access))
+)
+
 DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
 	TP_PROTO(u32 seqid, const stateid_t *stp),
 	TP_ARGS(seqid, stp),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d79db56475d4..0edfe6ff7d22 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -729,6 +729,8 @@ int nfsd_open_break_lease(struct inode *inode, int access)
 {
 	unsigned int mode;
 
+	trace_nfsd_open_break_lease(inode, access);
+
 	if (access & NFSD_MAY_NOT_BREAK_LEASE)
 		return 0;
 	mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] nfsd: print nf pointer in some nfsd_file tracepoints
  2022-07-29 16:47 [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease Jeff Layton
@ 2022-07-29 16:47 ` Jeff Layton
  2022-07-29 16:47 ` [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags Jeff Layton
  2022-07-29 17:20 ` [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease Chuck Lever III
  2 siblings, 0 replies; 10+ messages in thread
From: Jeff Layton @ 2022-07-29 16:47 UTC (permalink / raw)
  To: chuck.lever; +Cc: linux-nfs

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/trace.h | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 38fb1ca31010..e9c5d0f56977 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -803,19 +803,21 @@ TRACE_EVENT(nfsd_file_alloc,
 	),
 	TP_ARGS(nf),
 	TP_STRUCT__entry(
+		__field(const void *, nf)
 		__field(const void *, nf_inode)
 		__field(unsigned long, nf_flags)
 		__field(unsigned long, nf_may)
 		__field(unsigned int, nf_ref)
 	),
 	TP_fast_assign(
+		__entry->nf = nf;
 		__entry->nf_inode = nf->nf_inode;
 		__entry->nf_flags = nf->nf_flags;
 		__entry->nf_ref = refcount_read(&nf->nf_ref);
 		__entry->nf_may = nf->nf_may;
 	),
-	TP_printk("inode=%p ref=%u flags=%s may=%s",
-		__entry->nf_inode, __entry->nf_ref,
+	TP_printk("nf=%p inode=%p ref=%u flags=%s may=%s",
+		__entry->nf, __entry->nf_inode, __entry->nf_ref,
 		show_nf_flags(__entry->nf_flags),
 		show_nfsd_may_flags(__entry->nf_may)
 	)
@@ -834,6 +836,7 @@ TRACE_EVENT(nfsd_file_acquire,
 
 	TP_STRUCT__entry(
 		__field(u32, xid)
+		__field(const void *, nf)
 		__field(const void *, inode)
 		__field(unsigned long, may_flags)
 		__field(unsigned int, nf_ref)
@@ -845,6 +848,7 @@ TRACE_EVENT(nfsd_file_acquire,
 
 	TP_fast_assign(
 		__entry->xid = be32_to_cpu(rqstp->rq_xid);
+		__entry->nf = nf;
 		__entry->inode = inode;
 		__entry->may_flags = may_flags;
 		__entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
@@ -854,8 +858,8 @@ TRACE_EVENT(nfsd_file_acquire,
 		__entry->status = be32_to_cpu(status);
 	),
 
-	TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u",
-			__entry->xid, __entry->inode,
+	TP_printk("xid=0x%x nf=%p inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u",
+			__entry->xid, __entry->nf, __entry->inode,
 			show_nfsd_may_flags(__entry->may_flags),
 			__entry->nf_ref, show_nf_flags(__entry->nf_flags),
 			show_nfsd_may_flags(__entry->nf_may),
@@ -873,6 +877,7 @@ TRACE_EVENT(nfsd_file_create,
 	TP_ARGS(rqstp, may_flags, nf),
 
 	TP_STRUCT__entry(
+		__field(const void *, nf)
 		__field(const void *, nf_inode)
 		__field(const void *, nf_file)
 		__field(unsigned long, may_flags)
@@ -883,6 +888,7 @@ TRACE_EVENT(nfsd_file_create,
 	),
 
 	TP_fast_assign(
+		__entry->nf = nf;
 		__entry->nf_inode = nf->nf_inode;
 		__entry->nf_file = nf->nf_file;
 		__entry->may_flags = may_flags;
@@ -892,8 +898,8 @@ TRACE_EVENT(nfsd_file_create,
 		__entry->xid = be32_to_cpu(rqstp->rq_xid);
 	),
 
-	TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
-		__entry->xid, __entry->nf_inode,
+	TP_printk("xid=0x%x nf=%p inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
+		__entry->xid, __entry->nf, __entry->nf_inode,
 		show_nfsd_may_flags(__entry->may_flags),
 		__entry->nf_ref, show_nf_flags(__entry->nf_flags),
 		show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
@@ -1060,19 +1066,21 @@ DECLARE_EVENT_CLASS(nfsd_file_gc_class,
 	),
 	TP_ARGS(nf),
 	TP_STRUCT__entry(
-		__field(void *, nf_inode)
-		__field(void *, nf_file)
+		__field(const void *, nf)
+		__field(const void *, nf_inode)
+		__field(const void *, nf_file)
 		__field(int, nf_ref)
 		__field(unsigned long, nf_flags)
 	),
 	TP_fast_assign(
+		__entry->nf = nf;
 		__entry->nf_inode = nf->nf_inode;
 		__entry->nf_file = nf->nf_file;
 		__entry->nf_ref = refcount_read(&nf->nf_ref);
 		__entry->nf_flags = nf->nf_flags;
 	),
-	TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p",
-		__entry->nf_inode, __entry->nf_ref,
+	TP_printk("nf=%p inode=%p ref=%d nf_flags=%s nf_file=%p",
+		__entry->nf, __entry->nf_inode, __entry->nf_ref,
 		show_nf_flags(__entry->nf_flags),
 		__entry->nf_file
 	)
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags
  2022-07-29 16:47 [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease Jeff Layton
  2022-07-29 16:47 ` [PATCH 2/3] nfsd: print nf pointer in some nfsd_file tracepoints Jeff Layton
@ 2022-07-29 16:47 ` Jeff Layton
  2022-07-29 17:21   ` Chuck Lever III
  2022-07-29 17:20 ` [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease Chuck Lever III
  2 siblings, 1 reply; 10+ messages in thread
From: Jeff Layton @ 2022-07-29 16:47 UTC (permalink / raw)
  To: chuck.lever; +Cc: linux-nfs, Olga Kornieskaia

We had a report from the spring Bake-a-thon of data corruption in some
nfstest_interop tests. Looking at the traces showed the NFS server
allowing a v3 WRITE to proceed while a read delegation was still
outstanding.

Currently, we only set NFSD_FILE_BREAK_* flags if
NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
COMMIT ops, where we need a writeable filehandle but don't need to
break read leases.

It doesn't make any sense to consult that flag when allocating a file
since the file may be used on subsequent calls where we do want to break
the lease (and the usage of it here seems to be reverse from what it
should be anyway).

Also, after calling nfsd_open_break_lease, we don't want to clear the
BREAK_* bits. A lease could end up being set on it later (more than
once) and we need to be able to break those leases as well.

This means that the NFSD_FILE_BREAK_* flags now just mirror
NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
drop those flags and unconditionally call nfsd_open_break_lease every
time.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
Reported-by: Olga Kornieskaia <kolga@netapp.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/filecache.c | 26 +++-----------------------
 fs/nfsd/filecache.h |  4 +---
 fs/nfsd/trace.h     |  2 --
 3 files changed, 4 insertions(+), 28 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 4758c2a3fcf8..7e566ddca388 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -283,7 +283,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
 }
 
 static struct nfsd_file *
-nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+nfsd_file_alloc(struct nfsd_file_lookup_key *key)
 {
 	struct nfsd_file *nf;
 
@@ -301,12 +301,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
 		/* nf_ref is pre-incremented for hash table */
 		refcount_set(&nf->nf_ref, 2);
 		nf->nf_may = key->need;
-		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
-			if (may & NFSD_MAY_WRITE)
-				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
-			if (may & NFSD_MAY_READ)
-				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
-		}
 		nf->nf_mark = NULL;
 	}
 	return nf;
@@ -1090,7 +1084,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (nf)
 		goto wait_for_construction;
 
-	new = nfsd_file_alloc(&key, may_flags);
+	new = nfsd_file_alloc(&key);
 	if (!new) {
 		status = nfserr_jukebox;
 		goto out_status;
@@ -1130,21 +1124,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	nfsd_file_lru_remove(nf);
 	this_cpu_inc(nfsd_file_cache_hits);
 
-	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
-		bool write = (may_flags & NFSD_MAY_WRITE);
-
-		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
-		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
-			status = nfserrno(nfsd_open_break_lease(
-					file_inode(nf->nf_file), may_flags));
-			if (status == nfs_ok) {
-				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
-				if (write)
-					clear_bit(NFSD_FILE_BREAK_WRITE,
-						  &nf->nf_flags);
-			}
-		}
-	}
+	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
 out:
 	if (status == nfs_ok) {
 		if (open)
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index d534b76cb65b..8e8c0c47d67d 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -37,9 +37,7 @@ struct nfsd_file {
 	struct net		*nf_net;
 #define NFSD_FILE_HASHED	(0)
 #define NFSD_FILE_PENDING	(1)
-#define NFSD_FILE_BREAK_READ	(2)
-#define NFSD_FILE_BREAK_WRITE	(3)
-#define NFSD_FILE_REFERENCED	(4)
+#define NFSD_FILE_REFERENCED	(2)
 	unsigned long		nf_flags;
 	struct inode		*nf_inode;	/* don't deref */
 	refcount_t		nf_ref;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index e9c5d0f56977..2bd867a96eba 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -758,8 +758,6 @@ DEFINE_CLID_EVENT(confirmed_r);
 	__print_flags(val, "|",						\
 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
-		{ 1 << NFSD_FILE_BREAK_READ,	"BREAK_READ" },		\
-		{ 1 << NFSD_FILE_BREAK_WRITE,	"BREAK_WRITE" },	\
 		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"})
 
 DECLARE_EVENT_CLASS(nfsd_file_class,
-- 
2.37.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease
  2022-07-29 16:47 [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease Jeff Layton
  2022-07-29 16:47 ` [PATCH 2/3] nfsd: print nf pointer in some nfsd_file tracepoints Jeff Layton
  2022-07-29 16:47 ` [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags Jeff Layton
@ 2022-07-29 17:20 ` Chuck Lever III
  2 siblings, 0 replies; 10+ messages in thread
From: Chuck Lever III @ 2022-07-29 17:20 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List



> On Jul 29, 2022, at 12:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
> fs/nfsd/trace.h | 16 ++++++++++++++++
> fs/nfsd/vfs.c   |  2 ++
> 2 files changed, 18 insertions(+)
> 
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 96bb6629541e..38fb1ca31010 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -531,6 +531,22 @@ DEFINE_STATEID_EVENT(open);
> DEFINE_STATEID_EVENT(deleg_read);
> DEFINE_STATEID_EVENT(deleg_recall);
> 
> +TRACE_EVENT(nfsd_open_break_lease,
> +	TP_PROTO(struct inode *inode,
> +		 int access),
> +	TP_ARGS(inode, access),
> +	TP_STRUCT__entry(
> +		__field(void *, inode)
> +		__field(int, access)

trace_print_symbols_seq() takes an unsigned long,
so I prefer to store values that are passed to it in
an unsigned long field to make the type conversion
more obvious to readers.


> +	),
> +	TP_fast_assign(
> +		__entry->inode = inode;
> +		__entry->access = access;
> +	),
> +	TP_printk("inode=%p access=%s", __entry->inode,
> +		  show_nfsd_may_flags(__entry->access))
> +)
> +
> DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
> 	TP_PROTO(u32 seqid, const stateid_t *stp),
> 	TP_ARGS(seqid, stp),
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index d79db56475d4..0edfe6ff7d22 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -729,6 +729,8 @@ int nfsd_open_break_lease(struct inode *inode, int access)
> {
> 	unsigned int mode;
> 
> +	trace_nfsd_open_break_lease(inode, access);
> +

IMO this tracepoint should include the incoming XID as well.
I checked: each caller of nfsd_open_break_lease() has an
svc_rqst pointer to pass in here, and that can be passed
along to the tracepoint.

Thus if we're going to change the synopsis of
nfsd_open_break_lease() perhaps it would be better to return
a __be32 nfserr status, since every one of its call sites
appears to convert the returned hosterr value to an nfserr.


> 	if (access & NFSD_MAY_NOT_BREAK_LEASE)
> 		return 0;
> 	mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
> -- 
> 2.37.1
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags
  2022-07-29 16:47 ` [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags Jeff Layton
@ 2022-07-29 17:21   ` Chuck Lever III
  2022-07-29 17:34     ` Jeff Layton
  0 siblings, 1 reply; 10+ messages in thread
From: Chuck Lever III @ 2022-07-29 17:21 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List, Olga Kornievskaia



> On Jul 29, 2022, at 12:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> We had a report from the spring Bake-a-thon of data corruption in some
> nfstest_interop tests. Looking at the traces showed the NFS server
> allowing a v3 WRITE to proceed while a read delegation was still
> outstanding.
> 
> Currently, we only set NFSD_FILE_BREAK_* flags if
> NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
> NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
> COMMIT ops, where we need a writeable filehandle but don't need to
> break read leases.
> 
> It doesn't make any sense to consult that flag when allocating a file
> since the file may be used on subsequent calls where we do want to break
> the lease (and the usage of it here seems to be reverse from what it
> should be anyway).
> 
> Also, after calling nfsd_open_break_lease, we don't want to clear the
> BREAK_* bits. A lease could end up being set on it later (more than
> once) and we need to be able to break those leases as well.
> 
> This means that the NFSD_FILE_BREAK_* flags now just mirror
> NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
> drop those flags and unconditionally call nfsd_open_break_lease every
> time.
> 
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
> Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
> Reported-by: Olga Kornieskaia <kolga@netapp.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

I'm going to go out on a limb and predict this will conflict
heavily with the filecache overhaul patches I have queued for
next. :-)

Do you believe this is something that urgently needs to be
backported to stable kernels, or can it be rebased on top of
the filecache overhaul work?


> ---
> fs/nfsd/filecache.c | 26 +++-----------------------
> fs/nfsd/filecache.h |  4 +---
> fs/nfsd/trace.h     |  2 --
> 3 files changed, 4 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index 4758c2a3fcf8..7e566ddca388 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -283,7 +283,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
> }
> 
> static struct nfsd_file *
> -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> +nfsd_file_alloc(struct nfsd_file_lookup_key *key)
> {
> 	struct nfsd_file *nf;
> 
> @@ -301,12 +301,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> 		/* nf_ref is pre-incremented for hash table */
> 		refcount_set(&nf->nf_ref, 2);
> 		nf->nf_may = key->need;
> -		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
> -			if (may & NFSD_MAY_WRITE)
> -				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
> -			if (may & NFSD_MAY_READ)
> -				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
> -		}
> 		nf->nf_mark = NULL;
> 	}
> 	return nf;
> @@ -1090,7 +1084,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> 	if (nf)
> 		goto wait_for_construction;
> 
> -	new = nfsd_file_alloc(&key, may_flags);
> +	new = nfsd_file_alloc(&key);
> 	if (!new) {
> 		status = nfserr_jukebox;
> 		goto out_status;
> @@ -1130,21 +1124,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> 	nfsd_file_lru_remove(nf);
> 	this_cpu_inc(nfsd_file_cache_hits);
> 
> -	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
> -		bool write = (may_flags & NFSD_MAY_WRITE);
> -
> -		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
> -		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
> -			status = nfserrno(nfsd_open_break_lease(
> -					file_inode(nf->nf_file), may_flags));
> -			if (status == nfs_ok) {
> -				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
> -				if (write)
> -					clear_bit(NFSD_FILE_BREAK_WRITE,
> -						  &nf->nf_flags);
> -			}
> -		}
> -	}
> +	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
> out:
> 	if (status == nfs_ok) {
> 		if (open)
> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> index d534b76cb65b..8e8c0c47d67d 100644
> --- a/fs/nfsd/filecache.h
> +++ b/fs/nfsd/filecache.h
> @@ -37,9 +37,7 @@ struct nfsd_file {
> 	struct net		*nf_net;
> #define NFSD_FILE_HASHED	(0)
> #define NFSD_FILE_PENDING	(1)
> -#define NFSD_FILE_BREAK_READ	(2)
> -#define NFSD_FILE_BREAK_WRITE	(3)
> -#define NFSD_FILE_REFERENCED	(4)
> +#define NFSD_FILE_REFERENCED	(2)
> 	unsigned long		nf_flags;
> 	struct inode		*nf_inode;	/* don't deref */
> 	refcount_t		nf_ref;
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index e9c5d0f56977..2bd867a96eba 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -758,8 +758,6 @@ DEFINE_CLID_EVENT(confirmed_r);
> 	__print_flags(val, "|",						\
> 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
> 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
> -		{ 1 << NFSD_FILE_BREAK_READ,	"BREAK_READ" },		\
> -		{ 1 << NFSD_FILE_BREAK_WRITE,	"BREAK_WRITE" },	\
> 		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"})
> 
> DECLARE_EVENT_CLASS(nfsd_file_class,
> -- 
> 2.37.1
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags
  2022-07-29 17:21   ` Chuck Lever III
@ 2022-07-29 17:34     ` Jeff Layton
  2022-07-29 17:46       ` Chuck Lever III
  0 siblings, 1 reply; 10+ messages in thread
From: Jeff Layton @ 2022-07-29 17:34 UTC (permalink / raw)
  To: Chuck Lever III; +Cc: Linux NFS Mailing List, Olga Kornievskaia

On Fri, 2022-07-29 at 17:21 +0000, Chuck Lever III wrote:
> 
> > On Jul 29, 2022, at 12:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > We had a report from the spring Bake-a-thon of data corruption in some
> > nfstest_interop tests. Looking at the traces showed the NFS server
> > allowing a v3 WRITE to proceed while a read delegation was still
> > outstanding.
> > 
> > Currently, we only set NFSD_FILE_BREAK_* flags if
> > NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
> > NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
> > COMMIT ops, where we need a writeable filehandle but don't need to
> > break read leases.
> > 
> > It doesn't make any sense to consult that flag when allocating a file
> > since the file may be used on subsequent calls where we do want to break
> > the lease (and the usage of it here seems to be reverse from what it
> > should be anyway).
> > 
> > Also, after calling nfsd_open_break_lease, we don't want to clear the
> > BREAK_* bits. A lease could end up being set on it later (more than
> > once) and we need to be able to break those leases as well.
> > 
> > This means that the NFSD_FILE_BREAK_* flags now just mirror
> > NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
> > drop those flags and unconditionally call nfsd_open_break_lease every
> > time.
> > 
> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
> > Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
> > Reported-by: Olga Kornieskaia <kolga@netapp.com>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> 
> I'm going to go out on a limb and predict this will conflict
> heavily with the filecache overhaul patches I have queued for
> next. :-)
> 
> Do you believe this is something that urgently needs to be
> backported to stable kernels, or can it be rebased on top of
> the filecache overhaul work?
> 
> 

I based this on top of your for-next branch and I think the filecache is
already in there.

It's a pretty nasty bug that we probably will want backported, so it
might make sense to respin this on top of mainline and put it in ahead
of the filecache overhaul.

Thoughts?


> > ---
> > fs/nfsd/filecache.c | 26 +++-----------------------
> > fs/nfsd/filecache.h |  4 +---
> > fs/nfsd/trace.h     |  2 --
> > 3 files changed, 4 insertions(+), 28 deletions(-)
> > 
> > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> > index 4758c2a3fcf8..7e566ddca388 100644
> > --- a/fs/nfsd/filecache.c
> > +++ b/fs/nfsd/filecache.c
> > @@ -283,7 +283,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
> > }
> > 
> > static struct nfsd_file *
> > -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> > +nfsd_file_alloc(struct nfsd_file_lookup_key *key)
> > {
> > 	struct nfsd_file *nf;
> > 
> > @@ -301,12 +301,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> > 		/* nf_ref is pre-incremented for hash table */
> > 		refcount_set(&nf->nf_ref, 2);
> > 		nf->nf_may = key->need;
> > -		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
> > -			if (may & NFSD_MAY_WRITE)
> > -				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
> > -			if (may & NFSD_MAY_READ)
> > -				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
> > -		}
> > 		nf->nf_mark = NULL;
> > 	}
> > 	return nf;
> > @@ -1090,7 +1084,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > 	if (nf)
> > 		goto wait_for_construction;
> > 
> > -	new = nfsd_file_alloc(&key, may_flags);
> > +	new = nfsd_file_alloc(&key);
> > 	if (!new) {
> > 		status = nfserr_jukebox;
> > 		goto out_status;
> > @@ -1130,21 +1124,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > 	nfsd_file_lru_remove(nf);
> > 	this_cpu_inc(nfsd_file_cache_hits);
> > 
> > -	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
> > -		bool write = (may_flags & NFSD_MAY_WRITE);
> > -
> > -		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
> > -		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
> > -			status = nfserrno(nfsd_open_break_lease(
> > -					file_inode(nf->nf_file), may_flags));
> > -			if (status == nfs_ok) {
> > -				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
> > -				if (write)
> > -					clear_bit(NFSD_FILE_BREAK_WRITE,
> > -						  &nf->nf_flags);
> > -			}
> > -		}
> > -	}
> > +	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
> > out:
> > 	if (status == nfs_ok) {
> > 		if (open)
> > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> > index d534b76cb65b..8e8c0c47d67d 100644
> > --- a/fs/nfsd/filecache.h
> > +++ b/fs/nfsd/filecache.h
> > @@ -37,9 +37,7 @@ struct nfsd_file {
> > 	struct net		*nf_net;
> > #define NFSD_FILE_HASHED	(0)
> > #define NFSD_FILE_PENDING	(1)
> > -#define NFSD_FILE_BREAK_READ	(2)
> > -#define NFSD_FILE_BREAK_WRITE	(3)
> > -#define NFSD_FILE_REFERENCED	(4)
> > +#define NFSD_FILE_REFERENCED	(2)
> > 	unsigned long		nf_flags;
> > 	struct inode		*nf_inode;	/* don't deref */
> > 	refcount_t		nf_ref;
> > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> > index e9c5d0f56977..2bd867a96eba 100644
> > --- a/fs/nfsd/trace.h
> > +++ b/fs/nfsd/trace.h
> > @@ -758,8 +758,6 @@ DEFINE_CLID_EVENT(confirmed_r);
> > 	__print_flags(val, "|",						\
> > 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
> > 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
> > -		{ 1 << NFSD_FILE_BREAK_READ,	"BREAK_READ" },		\
> > -		{ 1 << NFSD_FILE_BREAK_WRITE,	"BREAK_WRITE" },	\
> > 		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"})
> > 
> > DECLARE_EVENT_CLASS(nfsd_file_class,
> > -- 
> > 2.37.1
> > 
> 
> --
> Chuck Lever
> 
> 
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags
  2022-07-29 17:34     ` Jeff Layton
@ 2022-07-29 17:46       ` Chuck Lever III
  2022-07-29 17:55         ` Jeff Layton
  2022-07-29 18:13         ` Chuck Lever III
  0 siblings, 2 replies; 10+ messages in thread
From: Chuck Lever III @ 2022-07-29 17:46 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List, Olga Kornievskaia



> On Jul 29, 2022, at 1:34 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Fri, 2022-07-29 at 17:21 +0000, Chuck Lever III wrote:
>> 
>>> On Jul 29, 2022, at 12:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
>>> 
>>> We had a report from the spring Bake-a-thon of data corruption in some
>>> nfstest_interop tests. Looking at the traces showed the NFS server
>>> allowing a v3 WRITE to proceed while a read delegation was still
>>> outstanding.
>>> 
>>> Currently, we only set NFSD_FILE_BREAK_* flags if
>>> NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
>>> NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
>>> COMMIT ops, where we need a writeable filehandle but don't need to
>>> break read leases.
>>> 
>>> It doesn't make any sense to consult that flag when allocating a file
>>> since the file may be used on subsequent calls where we do want to break
>>> the lease (and the usage of it here seems to be reverse from what it
>>> should be anyway).
>>> 
>>> Also, after calling nfsd_open_break_lease, we don't want to clear the
>>> BREAK_* bits. A lease could end up being set on it later (more than
>>> once) and we need to be able to break those leases as well.
>>> 
>>> This means that the NFSD_FILE_BREAK_* flags now just mirror
>>> NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
>>> drop those flags and unconditionally call nfsd_open_break_lease every
>>> time.
>>> 
>>> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
>>> Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
>>> Reported-by: Olga Kornieskaia <kolga@netapp.com>
>>> Signed-off-by: Jeff Layton <jlayton@kernel.org>
>> 
>> I'm going to go out on a limb and predict this will conflict
>> heavily with the filecache overhaul patches I have queued for
>> next. :-)
>> 
>> Do you believe this is something that urgently needs to be
>> backported to stable kernels, or can it be rebased on top of
>> the filecache overhaul work?
>> 
>> 
> 
> I based this on top of your for-next branch and I think the filecache is
> already in there.
> 
> It's a pretty nasty bug that we probably will want backported, so it
> might make sense to respin this on top of mainline and put it in ahead
> of the filecache overhaul.

I am a generally a proponent of enabling fix backports.

I encourage you to test the respin on 5.19 and 5.18 at least
because the moment that patch hits upstream, Sasha and Greg
will pull it into stable. I don't relish the idea of having
to fix the fix, if you catch my drift.

And perhaps when you repost, the fix should be reordered
before the patches that add the tracepoints.


> Thoughts?

Rebasing all that is mechanically straightforward to do.

The only issue is that the filecache work and the first PR
tag is already in my for-next branch. If you don't think it
will trigger massive heartburn for Linus and Stephen, I can
pull that stuff out now, and postpone my first PR until the
second week of the merge window.


>>> ---
>>> fs/nfsd/filecache.c | 26 +++-----------------------
>>> fs/nfsd/filecache.h |  4 +---
>>> fs/nfsd/trace.h     |  2 --
>>> 3 files changed, 4 insertions(+), 28 deletions(-)
>>> 
>>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>>> index 4758c2a3fcf8..7e566ddca388 100644
>>> --- a/fs/nfsd/filecache.c
>>> +++ b/fs/nfsd/filecache.c
>>> @@ -283,7 +283,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
>>> }
>>> 
>>> static struct nfsd_file *
>>> -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
>>> +nfsd_file_alloc(struct nfsd_file_lookup_key *key)
>>> {
>>> 	struct nfsd_file *nf;
>>> 
>>> @@ -301,12 +301,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
>>> 		/* nf_ref is pre-incremented for hash table */
>>> 		refcount_set(&nf->nf_ref, 2);
>>> 		nf->nf_may = key->need;
>>> -		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
>>> -			if (may & NFSD_MAY_WRITE)
>>> -				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
>>> -			if (may & NFSD_MAY_READ)
>>> -				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
>>> -		}
>>> 		nf->nf_mark = NULL;
>>> 	}
>>> 	return nf;
>>> @@ -1090,7 +1084,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>> 	if (nf)
>>> 		goto wait_for_construction;
>>> 
>>> -	new = nfsd_file_alloc(&key, may_flags);
>>> +	new = nfsd_file_alloc(&key);
>>> 	if (!new) {
>>> 		status = nfserr_jukebox;
>>> 		goto out_status;
>>> @@ -1130,21 +1124,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>> 	nfsd_file_lru_remove(nf);
>>> 	this_cpu_inc(nfsd_file_cache_hits);
>>> 
>>> -	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
>>> -		bool write = (may_flags & NFSD_MAY_WRITE);
>>> -
>>> -		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
>>> -		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
>>> -			status = nfserrno(nfsd_open_break_lease(
>>> -					file_inode(nf->nf_file), may_flags));
>>> -			if (status == nfs_ok) {
>>> -				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
>>> -				if (write)
>>> -					clear_bit(NFSD_FILE_BREAK_WRITE,
>>> -						  &nf->nf_flags);
>>> -			}
>>> -		}
>>> -	}
>>> +	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
>>> out:
>>> 	if (status == nfs_ok) {
>>> 		if (open)
>>> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
>>> index d534b76cb65b..8e8c0c47d67d 100644
>>> --- a/fs/nfsd/filecache.h
>>> +++ b/fs/nfsd/filecache.h
>>> @@ -37,9 +37,7 @@ struct nfsd_file {
>>> 	struct net		*nf_net;
>>> #define NFSD_FILE_HASHED	(0)
>>> #define NFSD_FILE_PENDING	(1)
>>> -#define NFSD_FILE_BREAK_READ	(2)
>>> -#define NFSD_FILE_BREAK_WRITE	(3)
>>> -#define NFSD_FILE_REFERENCED	(4)
>>> +#define NFSD_FILE_REFERENCED	(2)
>>> 	unsigned long		nf_flags;
>>> 	struct inode		*nf_inode;	/* don't deref */
>>> 	refcount_t		nf_ref;
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index e9c5d0f56977..2bd867a96eba 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -758,8 +758,6 @@ DEFINE_CLID_EVENT(confirmed_r);
>>> 	__print_flags(val, "|",						\
>>> 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
>>> 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
>>> -		{ 1 << NFSD_FILE_BREAK_READ,	"BREAK_READ" },		\
>>> -		{ 1 << NFSD_FILE_BREAK_WRITE,	"BREAK_WRITE" },	\
>>> 		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"})
>>> 
>>> DECLARE_EVENT_CLASS(nfsd_file_class,
>>> -- 
>>> 2.37.1
>>> 
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@kernel.org>

--
Chuck Lever




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags
  2022-07-29 17:46       ` Chuck Lever III
@ 2022-07-29 17:55         ` Jeff Layton
  2022-07-29 17:57           ` Chuck Lever III
  2022-07-29 18:13         ` Chuck Lever III
  1 sibling, 1 reply; 10+ messages in thread
From: Jeff Layton @ 2022-07-29 17:55 UTC (permalink / raw)
  To: Chuck Lever III; +Cc: Linux NFS Mailing List, Olga Kornievskaia

On Fri, 2022-07-29 at 17:46 +0000, Chuck Lever III wrote:
> 
> > On Jul 29, 2022, at 1:34 PM, Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > On Fri, 2022-07-29 at 17:21 +0000, Chuck Lever III wrote:
> > > 
> > > > On Jul 29, 2022, at 12:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
> > > > 
> > > > We had a report from the spring Bake-a-thon of data corruption in some
> > > > nfstest_interop tests. Looking at the traces showed the NFS server
> > > > allowing a v3 WRITE to proceed while a read delegation was still
> > > > outstanding.
> > > > 
> > > > Currently, we only set NFSD_FILE_BREAK_* flags if
> > > > NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
> > > > NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
> > > > COMMIT ops, where we need a writeable filehandle but don't need to
> > > > break read leases.
> > > > 
> > > > It doesn't make any sense to consult that flag when allocating a file
> > > > since the file may be used on subsequent calls where we do want to break
> > > > the lease (and the usage of it here seems to be reverse from what it
> > > > should be anyway).
> > > > 
> > > > Also, after calling nfsd_open_break_lease, we don't want to clear the
> > > > BREAK_* bits. A lease could end up being set on it later (more than
> > > > once) and we need to be able to break those leases as well.
> > > > 
> > > > This means that the NFSD_FILE_BREAK_* flags now just mirror
> > > > NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
> > > > drop those flags and unconditionally call nfsd_open_break_lease every
> > > > time.
> > > > 
> > > > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
> > > > Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
> > > > Reported-by: Olga Kornieskaia <kolga@netapp.com>
> > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > 
> > > I'm going to go out on a limb and predict this will conflict
> > > heavily with the filecache overhaul patches I have queued for
> > > next. :-)
> > > 
> > > Do you believe this is something that urgently needs to be
> > > backported to stable kernels, or can it be rebased on top of
> > > the filecache overhaul work?
> > > 
> > > 
> > 
> > I based this on top of your for-next branch and I think the filecache is
> > already in there.
> > 
> > It's a pretty nasty bug that we probably will want backported, so it
> > might make sense to respin this on top of mainline and put it in ahead
> > of the filecache overhaul.
> 
> I am a generally a proponent of enabling fix backports.
> 
> I encourage you to test the respin on 5.19 and 5.18 at least
> because the moment that patch hits upstream, Sasha and Greg
> will pull it into stable. I don't relish the idea of having
> to fix the fix, if you catch my drift.
> 
> And perhaps when you repost, the fix should be reordered
> before the patches that add the tracepoints.
> 

That all sounds good. I'll do that in a bit, and will send a v2.

> 
> > Thoughts?
> 
> Rebasing all that is mechanically straightforward to do.
> 
> The only issue is that the filecache work and the first PR
> tag is already in my for-next branch. If you don't think it
> will trigger massive heartburn for Linus and Stephen, I can
> pull that stuff out now, and postpone my first PR until the
> second week of the merge window.
> 

That may be best. I think we want to see this fix go in almost
everywhere.

> 
> > > > ---
> > > > fs/nfsd/filecache.c | 26 +++-----------------------
> > > > fs/nfsd/filecache.h |  4 +---
> > > > fs/nfsd/trace.h     |  2 --
> > > > 3 files changed, 4 insertions(+), 28 deletions(-)
> > > > 
> > > > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> > > > index 4758c2a3fcf8..7e566ddca388 100644
> > > > --- a/fs/nfsd/filecache.c
> > > > +++ b/fs/nfsd/filecache.c
> > > > @@ -283,7 +283,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
> > > > }
> > > > 
> > > > static struct nfsd_file *
> > > > -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> > > > +nfsd_file_alloc(struct nfsd_file_lookup_key *key)
> > > > {
> > > > 	struct nfsd_file *nf;
> > > > 
> > > > @@ -301,12 +301,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> > > > 		/* nf_ref is pre-incremented for hash table */
> > > > 		refcount_set(&nf->nf_ref, 2);
> > > > 		nf->nf_may = key->need;
> > > > -		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
> > > > -			if (may & NFSD_MAY_WRITE)
> > > > -				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
> > > > -			if (may & NFSD_MAY_READ)
> > > > -				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
> > > > -		}
> > > > 		nf->nf_mark = NULL;
> > > > 	}
> > > > 	return nf;
> > > > @@ -1090,7 +1084,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > > > 	if (nf)
> > > > 		goto wait_for_construction;
> > > > 
> > > > -	new = nfsd_file_alloc(&key, may_flags);
> > > > +	new = nfsd_file_alloc(&key);
> > > > 	if (!new) {
> > > > 		status = nfserr_jukebox;
> > > > 		goto out_status;
> > > > @@ -1130,21 +1124,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > > > 	nfsd_file_lru_remove(nf);
> > > > 	this_cpu_inc(nfsd_file_cache_hits);
> > > > 
> > > > -	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
> > > > -		bool write = (may_flags & NFSD_MAY_WRITE);
> > > > -
> > > > -		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
> > > > -		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
> > > > -			status = nfserrno(nfsd_open_break_lease(
> > > > -					file_inode(nf->nf_file), may_flags));
> > > > -			if (status == nfs_ok) {
> > > > -				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
> > > > -				if (write)
> > > > -					clear_bit(NFSD_FILE_BREAK_WRITE,
> > > > -						  &nf->nf_flags);
> > > > -			}
> > > > -		}
> > > > -	}
> > > > +	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
> > > > out:
> > > > 	if (status == nfs_ok) {
> > > > 		if (open)
> > > > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> > > > index d534b76cb65b..8e8c0c47d67d 100644
> > > > --- a/fs/nfsd/filecache.h
> > > > +++ b/fs/nfsd/filecache.h
> > > > @@ -37,9 +37,7 @@ struct nfsd_file {
> > > > 	struct net		*nf_net;
> > > > #define NFSD_FILE_HASHED	(0)
> > > > #define NFSD_FILE_PENDING	(1)
> > > > -#define NFSD_FILE_BREAK_READ	(2)
> > > > -#define NFSD_FILE_BREAK_WRITE	(3)
> > > > -#define NFSD_FILE_REFERENCED	(4)
> > > > +#define NFSD_FILE_REFERENCED	(2)
> > > > 	unsigned long		nf_flags;
> > > > 	struct inode		*nf_inode;	/* don't deref */
> > > > 	refcount_t		nf_ref;
> > > > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> > > > index e9c5d0f56977..2bd867a96eba 100644
> > > > --- a/fs/nfsd/trace.h
> > > > +++ b/fs/nfsd/trace.h
> > > > @@ -758,8 +758,6 @@ DEFINE_CLID_EVENT(confirmed_r);
> > > > 	__print_flags(val, "|",						\
> > > > 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
> > > > 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
> > > > -		{ 1 << NFSD_FILE_BREAK_READ,	"BREAK_READ" },		\
> > > > -		{ 1 << NFSD_FILE_BREAK_WRITE,	"BREAK_WRITE" },	\
> > > > 		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"})
> > > > 
> > > > DECLARE_EVENT_CLASS(nfsd_file_class,
> > > > -- 
> > > > 2.37.1
> > > > 
> > > 
> > > --
> > > Chuck Lever
> > > 
> > > 
> > > 
> > 
> > -- 
> > Jeff Layton <jlayton@kernel.org>
> 
> --
> Chuck Lever
> 
> 
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags
  2022-07-29 17:55         ` Jeff Layton
@ 2022-07-29 17:57           ` Chuck Lever III
  0 siblings, 0 replies; 10+ messages in thread
From: Chuck Lever III @ 2022-07-29 17:57 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List, Olga Kornievskaia



> On Jul 29, 2022, at 1:55 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Fri, 2022-07-29 at 17:46 +0000, Chuck Lever III wrote:
>> 
>>> On Jul 29, 2022, at 1:34 PM, Jeff Layton <jlayton@kernel.org> wrote:
>>> 
>>> On Fri, 2022-07-29 at 17:21 +0000, Chuck Lever III wrote:
>>>> 
>>>>> On Jul 29, 2022, at 12:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
>>>>> 
>>>>> We had a report from the spring Bake-a-thon of data corruption in some
>>>>> nfstest_interop tests. Looking at the traces showed the NFS server
>>>>> allowing a v3 WRITE to proceed while a read delegation was still
>>>>> outstanding.
>>>>> 
>>>>> Currently, we only set NFSD_FILE_BREAK_* flags if
>>>>> NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
>>>>> NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
>>>>> COMMIT ops, where we need a writeable filehandle but don't need to
>>>>> break read leases.
>>>>> 
>>>>> It doesn't make any sense to consult that flag when allocating a file
>>>>> since the file may be used on subsequent calls where we do want to break
>>>>> the lease (and the usage of it here seems to be reverse from what it
>>>>> should be anyway).
>>>>> 
>>>>> Also, after calling nfsd_open_break_lease, we don't want to clear the
>>>>> BREAK_* bits. A lease could end up being set on it later (more than
>>>>> once) and we need to be able to break those leases as well.
>>>>> 
>>>>> This means that the NFSD_FILE_BREAK_* flags now just mirror
>>>>> NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
>>>>> drop those flags and unconditionally call nfsd_open_break_lease every
>>>>> time.
>>>>> 
>>>>> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
>>>>> Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
>>>>> Reported-by: Olga Kornieskaia <kolga@netapp.com>
>>>>> Signed-off-by: Jeff Layton <jlayton@kernel.org>
>>>> 
>>>> I'm going to go out on a limb and predict this will conflict
>>>> heavily with the filecache overhaul patches I have queued for
>>>> next. :-)
>>>> 
>>>> Do you believe this is something that urgently needs to be
>>>> backported to stable kernels, or can it be rebased on top of
>>>> the filecache overhaul work?
>>>> 
>>>> 
>>> 
>>> I based this on top of your for-next branch and I think the filecache is
>>> already in there.
>>> 
>>> It's a pretty nasty bug that we probably will want backported, so it
>>> might make sense to respin this on top of mainline and put it in ahead
>>> of the filecache overhaul.
>> 
>> I am a generally a proponent of enabling fix backports.
>> 
>> I encourage you to test the respin on 5.19 and 5.18 at least
>> because the moment that patch hits upstream, Sasha and Greg
>> will pull it into stable. I don't relish the idea of having
>> to fix the fix, if you catch my drift.
>> 
>> And perhaps when you repost, the fix should be reordered
>> before the patches that add the tracepoints.
>> 
> 
> That all sounds good. I'll do that in a bit, and will send a v2.
> 
>> 
>>> Thoughts?
>> 
>> Rebasing all that is mechanically straightforward to do.
>> 
>> The only issue is that the filecache work and the first PR
>> tag is already in my for-next branch. If you don't think it
>> will trigger massive heartburn for Linus and Stephen, I can
>> pull that stuff out now, and postpone my first PR until the
>> second week of the merge window.
>> 
> 
> That may be best. I think we want to see this fix go in almost
> everywhere.

I will push the removal update to my public tree now. You can
send your v2 over the weekend or on Monday.


>>>>> ---
>>>>> fs/nfsd/filecache.c | 26 +++-----------------------
>>>>> fs/nfsd/filecache.h |  4 +---
>>>>> fs/nfsd/trace.h     |  2 --
>>>>> 3 files changed, 4 insertions(+), 28 deletions(-)
>>>>> 
>>>>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>>>>> index 4758c2a3fcf8..7e566ddca388 100644
>>>>> --- a/fs/nfsd/filecache.c
>>>>> +++ b/fs/nfsd/filecache.c
>>>>> @@ -283,7 +283,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
>>>>> }
>>>>> 
>>>>> static struct nfsd_file *
>>>>> -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
>>>>> +nfsd_file_alloc(struct nfsd_file_lookup_key *key)
>>>>> {
>>>>> 	struct nfsd_file *nf;
>>>>> 
>>>>> @@ -301,12 +301,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
>>>>> 		/* nf_ref is pre-incremented for hash table */
>>>>> 		refcount_set(&nf->nf_ref, 2);
>>>>> 		nf->nf_may = key->need;
>>>>> -		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
>>>>> -			if (may & NFSD_MAY_WRITE)
>>>>> -				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
>>>>> -			if (may & NFSD_MAY_READ)
>>>>> -				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
>>>>> -		}
>>>>> 		nf->nf_mark = NULL;
>>>>> 	}
>>>>> 	return nf;
>>>>> @@ -1090,7 +1084,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>>> 	if (nf)
>>>>> 		goto wait_for_construction;
>>>>> 
>>>>> -	new = nfsd_file_alloc(&key, may_flags);
>>>>> +	new = nfsd_file_alloc(&key);
>>>>> 	if (!new) {
>>>>> 		status = nfserr_jukebox;
>>>>> 		goto out_status;
>>>>> @@ -1130,21 +1124,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>>> 	nfsd_file_lru_remove(nf);
>>>>> 	this_cpu_inc(nfsd_file_cache_hits);
>>>>> 
>>>>> -	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
>>>>> -		bool write = (may_flags & NFSD_MAY_WRITE);
>>>>> -
>>>>> -		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
>>>>> -		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
>>>>> -			status = nfserrno(nfsd_open_break_lease(
>>>>> -					file_inode(nf->nf_file), may_flags));
>>>>> -			if (status == nfs_ok) {
>>>>> -				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
>>>>> -				if (write)
>>>>> -					clear_bit(NFSD_FILE_BREAK_WRITE,
>>>>> -						  &nf->nf_flags);
>>>>> -			}
>>>>> -		}
>>>>> -	}
>>>>> +	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
>>>>> out:
>>>>> 	if (status == nfs_ok) {
>>>>> 		if (open)
>>>>> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
>>>>> index d534b76cb65b..8e8c0c47d67d 100644
>>>>> --- a/fs/nfsd/filecache.h
>>>>> +++ b/fs/nfsd/filecache.h
>>>>> @@ -37,9 +37,7 @@ struct nfsd_file {
>>>>> 	struct net		*nf_net;
>>>>> #define NFSD_FILE_HASHED	(0)
>>>>> #define NFSD_FILE_PENDING	(1)
>>>>> -#define NFSD_FILE_BREAK_READ	(2)
>>>>> -#define NFSD_FILE_BREAK_WRITE	(3)
>>>>> -#define NFSD_FILE_REFERENCED	(4)
>>>>> +#define NFSD_FILE_REFERENCED	(2)
>>>>> 	unsigned long		nf_flags;
>>>>> 	struct inode		*nf_inode;	/* don't deref */
>>>>> 	refcount_t		nf_ref;
>>>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>>>> index e9c5d0f56977..2bd867a96eba 100644
>>>>> --- a/fs/nfsd/trace.h
>>>>> +++ b/fs/nfsd/trace.h
>>>>> @@ -758,8 +758,6 @@ DEFINE_CLID_EVENT(confirmed_r);
>>>>> 	__print_flags(val, "|",						\
>>>>> 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
>>>>> 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
>>>>> -		{ 1 << NFSD_FILE_BREAK_READ,	"BREAK_READ" },		\
>>>>> -		{ 1 << NFSD_FILE_BREAK_WRITE,	"BREAK_WRITE" },	\
>>>>> 		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"})
>>>>> 
>>>>> DECLARE_EVENT_CLASS(nfsd_file_class,
>>>>> -- 
>>>>> 2.37.1
>>>>> 
>>>> 
>>>> --
>>>> Chuck Lever
>>>> 
>>>> 
>>>> 
>>> 
>>> -- 
>>> Jeff Layton <jlayton@kernel.org>
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@kernel.org>

--
Chuck Lever




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags
  2022-07-29 17:46       ` Chuck Lever III
  2022-07-29 17:55         ` Jeff Layton
@ 2022-07-29 18:13         ` Chuck Lever III
  1 sibling, 0 replies; 10+ messages in thread
From: Chuck Lever III @ 2022-07-29 18:13 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Linux NFS Mailing List, Olga Kornievskaia



> On Jul 29, 2022, at 1:46 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
> 
> 
> 
>> On Jul 29, 2022, at 1:34 PM, Jeff Layton <jlayton@kernel.org> wrote:
>> 
>> On Fri, 2022-07-29 at 17:21 +0000, Chuck Lever III wrote:
>>> 
>>>> On Jul 29, 2022, at 12:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
>>>> 
>>>> We had a report from the spring Bake-a-thon of data corruption in some
>>>> nfstest_interop tests. Looking at the traces showed the NFS server
>>>> allowing a v3 WRITE to proceed while a read delegation was still
>>>> outstanding.
>>>> 
>>>> Currently, we only set NFSD_FILE_BREAK_* flags if
>>>> NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
>>>> NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
>>>> COMMIT ops, where we need a writeable filehandle but don't need to
>>>> break read leases.
>>>> 
>>>> It doesn't make any sense to consult that flag when allocating a file
>>>> since the file may be used on subsequent calls where we do want to break
>>>> the lease (and the usage of it here seems to be reverse from what it
>>>> should be anyway).
>>>> 
>>>> Also, after calling nfsd_open_break_lease, we don't want to clear the
>>>> BREAK_* bits. A lease could end up being set on it later (more than
>>>> once) and we need to be able to break those leases as well.
>>>> 
>>>> This means that the NFSD_FILE_BREAK_* flags now just mirror
>>>> NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
>>>> drop those flags and unconditionally call nfsd_open_break_lease every
>>>> time.
>>>> 
>>>> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
>>>> Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
>>>> Reported-by: Olga Kornieskaia <kolga@netapp.com>
>>>> Signed-off-by: Jeff Layton <jlayton@kernel.org>
>>> 
>>> I'm going to go out on a limb and predict this will conflict
>>> heavily with the filecache overhaul patches I have queued for
>>> next. :-)
>>> 
>>> Do you believe this is something that urgently needs to be
>>> backported to stable kernels, or can it be rebased on top of
>>> the filecache overhaul work?
>>> 
>>> 
>> 
>> I based this on top of your for-next branch and I think the filecache is
>> already in there.
>> 
>> It's a pretty nasty bug that we probably will want backported, so it
>> might make sense to respin this on top of mainline and put it in ahead
>> of the filecache overhaul.
> 
> I am a generally a proponent of enabling fix backports.
> 
> I encourage you to test the respin on 5.19 and 5.18 at least
> because the moment that patch hits upstream, Sasha and Greg
> will pull it into stable. I don't relish the idea of having
> to fix the fix, if you catch my drift.
> 
> And perhaps when you repost, the fix should be reordered
> before the patches that add the tracepoints.

Well... let's rebase the fix, but the tracepoint patches can
go in after the filecache overhaul, I think. That should make
it a little easier.


--
Chuck Lever




^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-07-29 18:13 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-29 16:47 [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease Jeff Layton
2022-07-29 16:47 ` [PATCH 2/3] nfsd: print nf pointer in some nfsd_file tracepoints Jeff Layton
2022-07-29 16:47 ` [PATCH 3/3] nfsd: eliminate the NFSD_FILE_BREAK_* flags Jeff Layton
2022-07-29 17:21   ` Chuck Lever III
2022-07-29 17:34     ` Jeff Layton
2022-07-29 17:46       ` Chuck Lever III
2022-07-29 17:55         ` Jeff Layton
2022-07-29 17:57           ` Chuck Lever III
2022-07-29 18:13         ` Chuck Lever III
2022-07-29 17:20 ` [PATCH 1/3] nfsd: add new tracepoint in nfsd_open_break_lease Chuck Lever III

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.