All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] nfs: fix -ENOSPC DIO write regression
@ 2022-07-22 18:12 Jeff Layton
  2022-07-22 18:12 ` [PATCH 1/3] nfs: add new nfs_direct_req tracepoint events Jeff Layton
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Jeff Layton @ 2022-07-22 18:12 UTC (permalink / raw)
  To: trond.myklebust, anna; +Cc: linux-nfs, chuck.lever, bxue

Boyang reported that xfstest generic/476 would never complete when run
against a filesystem that was "too small".

What I found was that we would end up trying to issue a large DIO write
that would come back short. The kernel would then follow up and try to
write out the rest and get back -ENOSPC. It would then try to issue a
commit, which would then try to reissue the writes, and around it would
go.

This patchset seems to fix it. Unfortunately, I'm not positive which
patch _broke_ this as it seems to have happened quite some time ago.

Jeff Layton (3):
  nfs: add new nfs_direct_req tracepoint events
  nfs: always check dreq->error after a commit
  nfs: only issue commit in DIO codepath if we have uncommitted data

 fs/nfs/direct.c         | 50 +++++++++--------------------
 fs/nfs/internal.h       | 33 ++++++++++++++++++++
 fs/nfs/nfstrace.h       | 69 +++++++++++++++++++++++++++++++++++++++++
 fs/nfs/write.c          | 48 +++++++++++++++++-----------
 include/linux/nfs_xdr.h |  1 +
 5 files changed, 148 insertions(+), 53 deletions(-)

-- 
2.36.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] nfs: add new nfs_direct_req tracepoint events
  2022-07-22 18:12 [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Jeff Layton
@ 2022-07-22 18:12 ` Jeff Layton
  2022-07-22 18:12 ` [PATCH 2/3] nfs: always check dreq->error after a commit Jeff Layton
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Jeff Layton @ 2022-07-22 18:12 UTC (permalink / raw)
  To: trond.myklebust, anna; +Cc: linux-nfs, chuck.lever, bxue

Add some new tracepoints to the DIO write code.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfs/direct.c   | 45 +++++++++----------------------
 fs/nfs/internal.h | 33 +++++++++++++++++++++++
 fs/nfs/nfstrace.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 114 insertions(+), 33 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4eb2a8380a28..ad40e81857ee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -60,44 +60,12 @@
 #include "iostat.h"
 #include "pnfs.h"
 #include "fscache.h"
+#include "nfstrace.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
 static struct kmem_cache *nfs_direct_cachep;
 
-struct nfs_direct_req {
-	struct kref		kref;		/* release manager */
-
-	/* I/O parameters */
-	struct nfs_open_context	*ctx;		/* file open context info */
-	struct nfs_lock_context *l_ctx;		/* Lock context info */
-	struct kiocb *		iocb;		/* controlling i/o request */
-	struct inode *		inode;		/* target file of i/o */
-
-	/* completion state */
-	atomic_t		io_count;	/* i/os we're waiting for */
-	spinlock_t		lock;		/* protect completion state */
-
-	loff_t			io_start;	/* Start offset for I/O */
-	ssize_t			count,		/* bytes actually processed */
-				max_count,	/* max expected count */
-				bytes_left,	/* bytes left to be sent */
-				error;		/* any reported error */
-	struct completion	completion;	/* wait for i/o completion */
-
-	/* commit state */
-	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo */
-	struct pnfs_ds_commit_info ds_cinfo;	/* Storage for cinfo */
-	struct work_struct	work;
-	int			flags;
-	/* for write */
-#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
-#define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
-	/* for read */
-#define NFS_ODIRECT_SHOULD_DIRTY	(3)	/* dirty user-space page after read */
-#define NFS_ODIRECT_DONE		INT_MAX	/* write verification failed */
-};
-
 static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq);
@@ -595,6 +563,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 	struct nfs_page *req;
 	int status = data->task.tk_status;
 
+	trace_nfs_direct_commit_complete(dreq);
+
 	if (status < 0) {
 		/* Errors in commit are fatal */
 		dreq->error = status;
@@ -631,6 +601,8 @@ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
 {
 	struct nfs_direct_req *dreq = cinfo->dreq;
 
+	trace_nfs_direct_resched_write(dreq);
+
 	spin_lock(&dreq->lock);
 	if (dreq->flags != NFS_ODIRECT_DONE)
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
@@ -695,6 +667,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq)
 {
+	trace_nfs_direct_write_complete(dreq);
 	queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
 }
 
@@ -705,6 +678,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 	int flags = NFS_ODIRECT_DONE;
 
+	trace_nfs_direct_write_completion(dreq);
+
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 
 	spin_lock(&dreq->lock);
@@ -759,6 +734,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
 
+	trace_nfs_direct_write_reschedule_io(dreq);
+
 	spin_lock(&dreq->lock);
 	if (dreq->error == 0) {
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
@@ -799,6 +776,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	size_t requested_bytes = 0;
 	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
 
+	trace_nfs_direct_write_schedule_iovec(dreq);
+
 	nfs_pageio_init_write(&desc, inode, ioflags, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8f8cd6e2d4db..24a48ba1ca7e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -859,3 +859,36 @@ static inline void nfs_set_port(struct sockaddr *sap, int *port,
 
 	rpc_set_port(sap, *port);
 }
+
+struct nfs_direct_req {
+	struct kref		kref;		/* release manager */
+
+	/* I/O parameters */
+	struct nfs_open_context	*ctx;		/* file open context info */
+	struct nfs_lock_context *l_ctx;		/* Lock context info */
+	struct kiocb *		iocb;		/* controlling i/o request */
+	struct inode *		inode;		/* target file of i/o */
+
+	/* completion state */
+	atomic_t		io_count;	/* i/os we're waiting for */
+	spinlock_t		lock;		/* protect completion state */
+
+	loff_t			io_start;	/* Start offset for I/O */
+	ssize_t			count,		/* bytes actually processed */
+				max_count,	/* max expected count */
+				bytes_left,	/* bytes left to be sent */
+				error;		/* any reported error */
+	struct completion	completion;	/* wait for i/o completion */
+
+	/* commit state */
+	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo */
+	struct pnfs_ds_commit_info ds_cinfo;	/* Storage for cinfo */
+	struct work_struct	work;
+	int			flags;
+	/* for write */
+#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
+	/* for read */
+#define NFS_ODIRECT_SHOULD_DIRTY	(3)	/* dirty user-space page after read */
+#define NFS_ODIRECT_DONE		INT_MAX	/* write verification failed */
+};
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 012bd7339862..65388e4a0cd7 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -1576,6 +1576,75 @@ TRACE_EVENT(nfs_commit_done,
 		)
 );
 
+#define nfs_show_direct_req_flags(v) \
+	__print_flags(v, "|", \
+			{ NFS_ODIRECT_DO_COMMIT, "DO_COMMIT" }, \
+			{ NFS_ODIRECT_RESCHED_WRITES, "RESCHED_WRITES" }, \
+			{ NFS_ODIRECT_SHOULD_DIRTY, "SHOULD DIRTY" }, \
+			{ NFS_ODIRECT_DONE, "DONE" } )
+
+DECLARE_EVENT_CLASS(nfs_direct_req_class,
+		TP_PROTO(
+			const struct nfs_direct_req *dreq
+		),
+
+		TP_ARGS(dreq),
+
+		TP_STRUCT__entry(
+			__field(const struct nfs_direct_req *, dreq)
+			__field(dev_t, dev)
+			__field(u64, fileid)
+			__field(u32, fhandle)
+			__field(int, ref)
+			__field(loff_t, io_start)
+			__field(ssize_t, count)
+			__field(ssize_t, bytes_left)
+			__field(ssize_t, error)
+			__field(int, flags)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = dreq->inode;
+			const struct nfs_inode *nfsi = NFS_I(inode);
+			const struct nfs_fh *fh = &nfsi->fh;
+
+			__entry->dreq = dreq;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = nfsi->fileid;
+			__entry->fhandle = nfs_fhandle_hash(fh);
+			__entry->ref = kref_read(&dreq->kref);
+			__entry->io_start = dreq->io_start;
+			__entry->count = dreq->count;
+			__entry->bytes_left = dreq->bytes_left;
+			__entry->error = dreq->error;
+			__entry->flags = dreq->flags;
+		),
+
+		TP_printk(
+			"dreq=%p fileid=%02x:%02x:%llu fhandle=0x%08x ref=%d "
+			"io_start=%lld count=%zd bytes_left=%zd error=%zd flags=%s",
+			__entry->dreq, MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle, __entry->ref,
+			__entry->io_start, __entry->count, __entry->bytes_left,
+			__entry->error, nfs_show_direct_req_flags(__entry->flags)
+		)
+);
+
+#define DEFINE_NFS_DIRECT_REQ_EVENT(name) \
+	DEFINE_EVENT(nfs_direct_req_class, name, \
+			TP_PROTO( \
+				const struct nfs_direct_req *dreq \
+			), \
+			TP_ARGS(dreq))
+
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_commit_complete);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_resched_write);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_complete);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io);
+
 TRACE_EVENT(nfs_fh_to_dentry,
 		TP_PROTO(
 			const struct super_block *sb,
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] nfs: always check dreq->error after a commit
  2022-07-22 18:12 [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Jeff Layton
  2022-07-22 18:12 ` [PATCH 1/3] nfs: add new nfs_direct_req tracepoint events Jeff Layton
@ 2022-07-22 18:12 ` Jeff Layton
  2022-07-22 18:12 ` [PATCH 3/3] nfs: only issue commit in DIO codepath if we have uncommitted data Jeff Layton
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Jeff Layton @ 2022-07-22 18:12 UTC (permalink / raw)
  To: trond.myklebust, anna; +Cc: linux-nfs, chuck.lever, bxue

When the client gets back a short DIO write, it will then attempt to
issue another write to finish the DIO request. If that write then fails
(as is often the case in an -ENOSPC situation), then we still may need
to issue a COMMIT if the earlier short write was unstable. If that COMMIT
then succeeds, then we don't want the client to reschedule the write
requests, and to instead just return a short write. Otherwise, we can
end up looping over the same DIO write forever.

Always consult dreq->error after a successful RPC, even when the flag
state is not NFS_ODIRECT_DONE.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2028370
Reported-by: Boyang Xue <bxue@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfs/direct.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ad40e81857ee..a47d13296194 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -571,8 +571,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 		dreq->max_count = 0;
 		dreq->count = 0;
 		dreq->flags = NFS_ODIRECT_DONE;
-	} else if (dreq->flags == NFS_ODIRECT_DONE)
+	} else {
 		status = dreq->error;
+	}
 
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] nfs: only issue commit in DIO codepath if we have uncommitted data
  2022-07-22 18:12 [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Jeff Layton
  2022-07-22 18:12 ` [PATCH 1/3] nfs: add new nfs_direct_req tracepoint events Jeff Layton
  2022-07-22 18:12 ` [PATCH 2/3] nfs: always check dreq->error after a commit Jeff Layton
@ 2022-07-22 18:12 ` Jeff Layton
  2022-07-24 19:10 ` [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Trond Myklebust
  2022-08-04  8:54 ` Boyang Xue
  4 siblings, 0 replies; 7+ messages in thread
From: Jeff Layton @ 2022-07-22 18:12 UTC (permalink / raw)
  To: trond.myklebust, anna; +Cc: linux-nfs, chuck.lever, bxue

Currently, we try to determine whether to issue a commit based on
nfs_write_need_commit which looks at the current verifier. In the case
where we got a short write and then tried to follow it up with one that
failed, the verifier can't be trusted.

What we really want to know is whether the pgio request had any
successful writes that came back as UNSTABLE. Add a new flag to the pgio
request, and use that to indicate that we've had a successful unstable
write. Only issue a commit if that flag is set.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfs/direct.c         |  2 +-
 fs/nfs/write.c          | 48 +++++++++++++++++++++++++----------------
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index a47d13296194..86df66bb14c5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -690,7 +690,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 	}
 
 	nfs_direct_count_bytes(dreq, hdr);
-	if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) {
+	if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) {
 		if (!dreq->flags)
 			dreq->flags = NFS_ODIRECT_DO_COMMIT;
 		flags = dreq->flags;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1c706465d090..16d166bc4099 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1576,25 +1576,37 @@ static int nfs_writeback_done(struct rpc_task *task,
 	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
 	trace_nfs_writeback_done(task, hdr);
 
-	if (hdr->res.verf->committed < hdr->args.stable &&
-	    task->tk_status >= 0) {
-		/* We tried a write call, but the server did not
-		 * commit data to stable storage even though we
-		 * requested it.
-		 * Note: There is a known bug in Tru64 < 5.0 in which
-		 *	 the server reports NFS_DATA_SYNC, but performs
-		 *	 NFS_FILE_SYNC. We therefore implement this checking
-		 *	 as a dprintk() in order to avoid filling syslog.
-		 */
-		static unsigned long    complain;
+	if (task->tk_status >= 0) {
+		enum nfs3_stable_how committed = hdr->res.verf->committed;
+
+		if (committed == NFS_UNSTABLE) {
+			/*
+			 * We have some uncommitted data on the server at
+			 * this point, so ensure that we keep track of that
+			 * fact irrespective of what later writes do.
+			 */
+			set_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags);
+		}
 
-		/* Note this will print the MDS for a DS write */
-		if (time_before(complain, jiffies)) {
-			dprintk("NFS:       faulty NFS server %s:"
-				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(inode)->nfs_client->cl_hostname,
-				hdr->res.verf->committed, hdr->args.stable);
-			complain = jiffies + 300 * HZ;
+		if (committed < hdr->args.stable) {
+			/* We tried a write call, but the server did not
+			 * commit data to stable storage even though we
+			 * requested it.
+			 * Note: There is a known bug in Tru64 < 5.0 in which
+			 *	 the server reports NFS_DATA_SYNC, but performs
+			 *	 NFS_FILE_SYNC. We therefore implement this checking
+			 *	 as a dprintk() in order to avoid filling syslog.
+			 */
+			static unsigned long    complain;
+
+			/* Note this will print the MDS for a DS write */
+			if (time_before(complain, jiffies)) {
+				dprintk("NFS:       faulty NFS server %s:"
+					" (committed = %d) != (stable = %d)\n",
+					NFS_SERVER(inode)->nfs_client->cl_hostname,
+					committed, hdr->args.stable);
+				complain = jiffies + 300 * HZ;
+			}
 		}
 	}
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0e3aa0f5f324..e86cf6642d21 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1600,6 +1600,7 @@ enum {
 	NFS_IOHDR_STAT,
 	NFS_IOHDR_RESEND_PNFS,
 	NFS_IOHDR_RESEND_MDS,
+	NFS_IOHDR_UNSTABLE_WRITES,
 };
 
 struct nfs_io_completion;
-- 
2.36.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] nfs: fix -ENOSPC DIO write regression
  2022-07-22 18:12 [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Jeff Layton
                   ` (2 preceding siblings ...)
  2022-07-22 18:12 ` [PATCH 3/3] nfs: only issue commit in DIO codepath if we have uncommitted data Jeff Layton
@ 2022-07-24 19:10 ` Trond Myklebust
  2022-07-24 20:18   ` Trond Myklebust
  2022-08-04  8:54 ` Boyang Xue
  4 siblings, 1 reply; 7+ messages in thread
From: Trond Myklebust @ 2022-07-24 19:10 UTC (permalink / raw)
  To: anna, jlayton; +Cc: linux-nfs, chuck.lever, bxue

On Fri, 2022-07-22 at 14:12 -0400, Jeff Layton wrote:
> Boyang reported that xfstest generic/476 would never complete when
> run
> against a filesystem that was "too small".
> 
> What I found was that we would end up trying to issue a large DIO
> write
> that would come back short. The kernel would then follow up and try
> to
> write out the rest and get back -ENOSPC. It would then try to issue a
> commit, which would then try to reissue the writes, and around it
> would
> go.
> 
> This patchset seems to fix it. Unfortunately, I'm not positive which
> patch _broke_ this as it seems to have happened quite some time ago.
> 
> Jeff Layton (3):
>   nfs: add new nfs_direct_req tracepoint events
>   nfs: always check dreq->error after a commit
>   nfs: only issue commit in DIO codepath if we have uncommitted data
> 
>  fs/nfs/direct.c         | 50 +++++++++--------------------
>  fs/nfs/internal.h       | 33 ++++++++++++++++++++
>  fs/nfs/nfstrace.h       | 69
> +++++++++++++++++++++++++++++++++++++++++
>  fs/nfs/write.c          | 48 +++++++++++++++++-----------
>  include/linux/nfs_xdr.h |  1 +
>  5 files changed, 148 insertions(+), 53 deletions(-)
> 

With this series applied, I'm seeing things like xfstests generic/013
looping forever.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] nfs: fix -ENOSPC DIO write regression
  2022-07-24 19:10 ` [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Trond Myklebust
@ 2022-07-24 20:18   ` Trond Myklebust
  0 siblings, 0 replies; 7+ messages in thread
From: Trond Myklebust @ 2022-07-24 20:18 UTC (permalink / raw)
  To: anna, jlayton; +Cc: linux-nfs, chuck.lever, bxue

On Sun, 2022-07-24 at 19:10 +0000, Trond Myklebust wrote:
> On Fri, 2022-07-22 at 14:12 -0400, Jeff Layton wrote:
> > Boyang reported that xfstest generic/476 would never complete when
> > run
> > against a filesystem that was "too small".
> > 
> > What I found was that we would end up trying to issue a large DIO
> > write
> > that would come back short. The kernel would then follow up and try
> > to
> > write out the rest and get back -ENOSPC. It would then try to issue
> > a
> > commit, which would then try to reissue the writes, and around it
> > would
> > go.
> > 
> > This patchset seems to fix it. Unfortunately, I'm not positive
> > which
> > patch _broke_ this as it seems to have happened quite some time
> > ago.
> > 
> > Jeff Layton (3):
> >   nfs: add new nfs_direct_req tracepoint events
> >   nfs: always check dreq->error after a commit
> >   nfs: only issue commit in DIO codepath if we have uncommitted
> > data
> > 
> >  fs/nfs/direct.c         | 50 +++++++++--------------------
> >  fs/nfs/internal.h       | 33 ++++++++++++++++++++
> >  fs/nfs/nfstrace.h       | 69
> > +++++++++++++++++++++++++++++++++++++++++
> >  fs/nfs/write.c          | 48 +++++++++++++++++-----------
> >  include/linux/nfs_xdr.h |  1 +
> >  5 files changed, 148 insertions(+), 53 deletions(-)
> > 
> 
> With this series applied, I'm seeing things like xfstests generic/013
> looping forever.
> 

Sorry, false alarm... That turned out to be due to an interesting
readahead config issue.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] nfs: fix -ENOSPC DIO write regression
  2022-07-22 18:12 [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Jeff Layton
                   ` (3 preceding siblings ...)
  2022-07-24 19:10 ` [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Trond Myklebust
@ 2022-08-04  8:54 ` Boyang Xue
  4 siblings, 0 replies; 7+ messages in thread
From: Boyang Xue @ 2022-08-04  8:54 UTC (permalink / raw)
  To: Jeff Layton; +Cc: trond.myklebust, anna, linux-nfs, chuck.lever

Hi Jeff,

Thanks for fixing this! I have run some tests against this patchset
for days, and the results are all good. generic/476 would complete
within 30 mins typically. These tests are:

For verifying generic/476:
xfstests-multihost-nfsv3-over-ext4
xfstests-multihost-nfsv3-over-feature-ext4
xfstests-multihost-nfsv3-over-feature-xfs
xfstests-multihost-nfsv3-over-xfs
xfstests-multihost-nfsv4.0-over-ext4
xfstests-multihost-nfsv4.0-over-feature-ext4
xfstests-multihost-nfsv4.0-over-feature-xfs
xfstests-multihost-nfsv4.0-over-xfs
xfstests-multihost-nfsv4.1-over-ext4
xfstests-multihost-nfsv4.1-over-feature-ext4
xfstests-multihost-nfsv4.1-over-feature-xfs
xfstests-multihost-nfsv4.1-over-xfs
xfstests-multihost-nfsv4.2-over-ext4
xfstests-multihost-nfsv4.2-over-feature-ext4
xfstests-multihost-nfsv4.2-over-feature-xfs
xfstests-multihost-nfsv4.2-over-xfs
xfstests-localhost-nfsv3
xfstests-localhost-nfsv4.0
xfstests-localhost-nfsv4.1
xfstests-localhost-nfsv4.2

Regression tests:
ltp-nfsv{3,4.0,4.1,4.2}
pjd-test: nfs
nfs-connectathon
nfs-sanity-check

All tests were run on x86_64, aarch64, ppc64le, and s390x (part, due
to some config issues).

Hope this helps.

Thanks,
Boyang

On Sat, Jul 23, 2022 at 2:12 AM Jeff Layton <jlayton@kernel.org> wrote:
>
> Boyang reported that xfstest generic/476 would never complete when run
> against a filesystem that was "too small".
>
> What I found was that we would end up trying to issue a large DIO write
> that would come back short. The kernel would then follow up and try to
> write out the rest and get back -ENOSPC. It would then try to issue a
> commit, which would then try to reissue the writes, and around it would
> go.
>
> This patchset seems to fix it. Unfortunately, I'm not positive which
> patch _broke_ this as it seems to have happened quite some time ago.
>
> Jeff Layton (3):
>   nfs: add new nfs_direct_req tracepoint events
>   nfs: always check dreq->error after a commit
>   nfs: only issue commit in DIO codepath if we have uncommitted data
>
>  fs/nfs/direct.c         | 50 +++++++++--------------------
>  fs/nfs/internal.h       | 33 ++++++++++++++++++++
>  fs/nfs/nfstrace.h       | 69 +++++++++++++++++++++++++++++++++++++++++
>  fs/nfs/write.c          | 48 +++++++++++++++++-----------
>  include/linux/nfs_xdr.h |  1 +
>  5 files changed, 148 insertions(+), 53 deletions(-)
>
> --
> 2.36.1
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-08-04  8:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-22 18:12 [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Jeff Layton
2022-07-22 18:12 ` [PATCH 1/3] nfs: add new nfs_direct_req tracepoint events Jeff Layton
2022-07-22 18:12 ` [PATCH 2/3] nfs: always check dreq->error after a commit Jeff Layton
2022-07-22 18:12 ` [PATCH 3/3] nfs: only issue commit in DIO codepath if we have uncommitted data Jeff Layton
2022-07-24 19:10 ` [PATCH 0/3] nfs: fix -ENOSPC DIO write regression Trond Myklebust
2022-07-24 20:18   ` Trond Myklebust
2022-08-04  8:54 ` Boyang Xue

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.