linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/5] Fix O_DIRECT writeback error paths
@ 2023-09-04 16:34 trondmy
  2023-09-04 16:34 ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling trondmy
  0 siblings, 1 reply; 12+ messages in thread
From: trondmy @ 2023-09-04 16:34 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

In recent tests of the O_DIRECT code, a number of bugs were discovered
in the error codepaths, some of which were shown to result in data
corruption. The tests involved using pNFS flexfiles with 3-way
mirroring, and then shutting down one of the data servers. When the data
was later re-read from the server, it was shown to not match the
original data.

---
v1 - NFS: Fix error handling for O_DIRECT write scheduling
v2 - Fixes for incorrect commit info
     Fixes for O_DIRECT accounting
     O_DIRECT locking issues
     Fix write scheduling issues

Trond Myklebust (5):
  NFS: Fix error handling for O_DIRECT write scheduling
  NFS: Fix O_DIRECT locking issues
  NFS: More O_DIRECT accounting fixes for error paths
  NFS: Use the correct commit info in nfs_join_page_group()
  NFS: More fixes for nfs_direct_write_reschedule_io()

 fs/nfs/direct.c          | 134 +++++++++++++++++++++++++++------------
 fs/nfs/write.c           |  23 +++----
 include/linux/nfs_page.h |   4 +-
 3 files changed, 108 insertions(+), 53 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling
  2023-09-04 16:34 [PATCH v2 0/5] Fix O_DIRECT writeback error paths trondmy
@ 2023-09-04 16:34 ` trondmy
  2023-09-04 16:34   ` [PATCH v2 2/5] NFS: Fix O_DIRECT locking issues trondmy
  2023-11-09 15:05   ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling Benjamin Coddington
  0 siblings, 2 replies; 12+ messages in thread
From: trondmy @ 2023-09-04 16:34 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If we fail to schedule a request for transmission, there are 2
possibilities:
1) Either we hit a fatal error, and we just want to drop the remaining
   requests on the floor.
2) We were asked to try again, in which case we should allow the
   outstanding RPC calls to complete, so that we can recoalesce requests
   and try again.

Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c | 62 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 47d892a1d363..ee88f0a6e7b8 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -528,10 +528,9 @@ nfs_direct_write_scan_commit_list(struct inode *inode,
 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 {
 	struct nfs_pageio_descriptor desc;
-	struct nfs_page *req, *tmp;
+	struct nfs_page *req;
 	LIST_HEAD(reqs);
 	struct nfs_commit_info cinfo;
-	LIST_HEAD(failed);
 
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
@@ -549,27 +548,36 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 
-	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+	while (!list_empty(&reqs)) {
+		req = nfs_list_entry(reqs.next);
 		/* Bump the transmission count */
 		req->wb_nio++;
 		if (!nfs_pageio_add_request(&desc, req)) {
-			nfs_list_move_request(req, &failed);
 			spin_lock(&cinfo.inode->i_lock);
-			dreq->flags = 0;
-			if (desc.pg_error < 0)
+			if (dreq->error < 0) {
+				desc.pg_error = dreq->error;
+			} else if (desc.pg_error != -EAGAIN) {
+				dreq->flags = 0;
+				if (!desc.pg_error)
+					desc.pg_error = -EIO;
 				dreq->error = desc.pg_error;
-			else
-				dreq->error = -EIO;
+			} else
+				dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 			spin_unlock(&cinfo.inode->i_lock);
+			break;
 		}
 		nfs_release_request(req);
 	}
 	nfs_pageio_complete(&desc);
 
-	while (!list_empty(&failed)) {
-		req = nfs_list_entry(failed.next);
+	while (!list_empty(&reqs)) {
+		req = nfs_list_entry(reqs.next);
 		nfs_list_remove_request(req);
 		nfs_unlock_and_release_request(req);
+		if (desc.pg_error == -EAGAIN)
+			nfs_mark_request_commit(req, NULL, &cinfo, 0);
+		else
+			nfs_release_request(req);
 	}
 
 	if (put_dreq(dreq))
@@ -794,9 +802,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 {
 	struct nfs_pageio_descriptor desc;
 	struct inode *inode = dreq->inode;
+	struct nfs_commit_info cinfo;
 	ssize_t result = 0;
 	size_t requested_bytes = 0;
 	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
+	bool defer = false;
 
 	trace_nfs_direct_write_schedule_iovec(dreq);
 
@@ -837,17 +847,37 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 				break;
 			}
 
-			nfs_lock_request(req);
-			if (!nfs_pageio_add_request(&desc, req)) {
-				result = desc.pg_error;
-				nfs_unlock_and_release_request(req);
-				break;
-			}
 			pgbase = 0;
 			bytes -= req_len;
 			requested_bytes += req_len;
 			pos += req_len;
 			dreq->bytes_left -= req_len;
+
+			if (defer) {
+				nfs_mark_request_commit(req, NULL, &cinfo, 0);
+				continue;
+			}
+
+			nfs_lock_request(req);
+			if (nfs_pageio_add_request(&desc, req))
+				continue;
+
+			/* Exit on hard errors */
+			if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) {
+				result = desc.pg_error;
+				nfs_unlock_and_release_request(req);
+				break;
+			}
+
+			/* If the error is soft, defer remaining requests */
+			nfs_init_cinfo_from_dreq(&cinfo, dreq);
+			spin_lock(&cinfo.inode->i_lock);
+			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+			spin_unlock(&cinfo.inode->i_lock);
+			nfs_unlock_request(req);
+			nfs_mark_request_commit(req, NULL, &cinfo, 0);
+			desc.pg_error = 0;
+			defer = true;
 		}
 		nfs_direct_release_pages(pagevec, npages);
 		kvfree(pagevec);
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 2/5] NFS: Fix O_DIRECT locking issues
  2023-09-04 16:34 ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling trondmy
@ 2023-09-04 16:34   ` trondmy
  2023-09-04 16:34     ` [PATCH v2 3/5] NFS: More O_DIRECT accounting fixes for error paths trondmy
  2023-11-09 15:05   ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling Benjamin Coddington
  1 sibling, 1 reply; 12+ messages in thread
From: trondmy @ 2023-09-04 16:34 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The dreq fields are protected by the dreq->lock.

Fixes: 0703dc52ef0b ("NFS: Fix error handling for O_DIRECT write scheduling")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ee88f0a6e7b8..e8a1645857dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -553,7 +553,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 		/* Bump the transmission count */
 		req->wb_nio++;
 		if (!nfs_pageio_add_request(&desc, req)) {
-			spin_lock(&cinfo.inode->i_lock);
+			spin_lock(&dreq->lock);
 			if (dreq->error < 0) {
 				desc.pg_error = dreq->error;
 			} else if (desc.pg_error != -EAGAIN) {
@@ -563,7 +563,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 				dreq->error = desc.pg_error;
 			} else
 				dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-			spin_unlock(&cinfo.inode->i_lock);
+			spin_unlock(&dreq->lock);
 			break;
 		}
 		nfs_release_request(req);
@@ -871,9 +871,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 
 			/* If the error is soft, defer remaining requests */
 			nfs_init_cinfo_from_dreq(&cinfo, dreq);
-			spin_lock(&cinfo.inode->i_lock);
+			spin_lock(&dreq->lock);
 			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-			spin_unlock(&cinfo.inode->i_lock);
+			spin_unlock(&dreq->lock);
 			nfs_unlock_request(req);
 			nfs_mark_request_commit(req, NULL, &cinfo, 0);
 			desc.pg_error = 0;
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 3/5] NFS: More O_DIRECT accounting fixes for error paths
  2023-09-04 16:34   ` [PATCH v2 2/5] NFS: Fix O_DIRECT locking issues trondmy
@ 2023-09-04 16:34     ` trondmy
  2023-09-04 16:34       ` [PATCH v2 4/5] NFS: Use the correct commit info in nfs_join_page_group() trondmy
  0 siblings, 1 reply; 12+ messages in thread
From: trondmy @ 2023-09-04 16:34 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If we hit a fatal error when retransmitting, we do need to record the
removal of the request from the count of written bytes.

Fixes: 031d73ed768a ("NFS: Fix O_DIRECT accounting of number of bytes read/written")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c | 47 +++++++++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e8a1645857dd..a53e50123499 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -93,12 +93,10 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
 		dreq->max_count = dreq_len;
 		if (dreq->count > dreq_len)
 			dreq->count = dreq_len;
-
-		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
-			dreq->error = hdr->error;
-		else /* Clear outstanding error if this is EOF */
-			dreq->error = 0;
 	}
+
+	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error)
+		dreq->error = hdr->error;
 }
 
 static void
@@ -120,6 +118,18 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq,
 		dreq->count = dreq_len;
 }
 
+static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
+					struct nfs_page *req)
+{
+	loff_t offs = req_offset(req);
+	size_t req_start = (size_t)(offs - dreq->io_start);
+
+	if (req_start < dreq->max_count)
+		dreq->max_count = req_start;
+	if (req_start < dreq->count)
+		dreq->count = req_start;
+}
+
 /**
  * nfs_swap_rw - NFS address space operation for swap I/O
  * @iocb: target I/O control block
@@ -537,10 +547,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
 	nfs_direct_join_group(&reqs, dreq->inode);
 
-	dreq->count = 0;
-	dreq->max_count = 0;
-	list_for_each_entry(req, &reqs, wb_list)
-		dreq->max_count += req->wb_bytes;
 	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
 	get_dreq(dreq);
 
@@ -574,10 +580,14 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 		req = nfs_list_entry(reqs.next);
 		nfs_list_remove_request(req);
 		nfs_unlock_and_release_request(req);
-		if (desc.pg_error == -EAGAIN)
+		if (desc.pg_error == -EAGAIN) {
 			nfs_mark_request_commit(req, NULL, &cinfo, 0);
-		else
+		} else {
+			spin_lock(&dreq->lock);
+			nfs_direct_truncate_request(dreq, req);
+			spin_unlock(&dreq->lock);
 			nfs_release_request(req);
+		}
 	}
 
 	if (put_dreq(dreq))
@@ -597,8 +607,6 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 	if (status < 0) {
 		/* Errors in commit are fatal */
 		dreq->error = status;
-		dreq->max_count = 0;
-		dreq->count = 0;
 		dreq->flags = NFS_ODIRECT_DONE;
 	} else {
 		status = dreq->error;
@@ -609,7 +617,12 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
-		if (status >= 0 && !nfs_write_match_verf(verf, req)) {
+		if (status < 0) {
+			spin_lock(&dreq->lock);
+			nfs_direct_truncate_request(dreq, req);
+			spin_unlock(&dreq->lock);
+			nfs_release_request(req);
+		} else if (!nfs_write_match_verf(verf, req)) {
 			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 			/*
 			 * Despite the reboot, the write was successful,
@@ -617,7 +630,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 			 */
 			req->wb_nio = 0;
 			nfs_mark_request_commit(req, NULL, &cinfo, 0);
-		} else /* Error or match */
+		} else
 			nfs_release_request(req);
 		nfs_unlock_and_release_request(req);
 	}
@@ -670,6 +683,7 @@ static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
 	while (!list_empty(&reqs)) {
 		req = nfs_list_entry(reqs.next);
 		nfs_list_remove_request(req);
+		nfs_direct_truncate_request(dreq, req);
 		nfs_release_request(req);
 		nfs_unlock_and_release_request(req);
 	}
@@ -719,7 +733,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 	}
 
 	nfs_direct_count_bytes(dreq, hdr);
-	if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) {
+	if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) &&
+	    !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
 		if (!dreq->flags)
 			dreq->flags = NFS_ODIRECT_DO_COMMIT;
 		flags = dreq->flags;
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 4/5] NFS: Use the correct commit info in nfs_join_page_group()
  2023-09-04 16:34     ` [PATCH v2 3/5] NFS: More O_DIRECT accounting fixes for error paths trondmy
@ 2023-09-04 16:34       ` trondmy
  2023-09-04 16:34         ` [PATCH v2 5/5] NFS: More fixes for nfs_direct_write_reschedule_io() trondmy
  0 siblings, 1 reply; 12+ messages in thread
From: trondmy @ 2023-09-04 16:34 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Ensure that nfs_clear_request_commit() updates the correct counters when
it removes them from the commit list.

Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c          |  8 +++++---
 fs/nfs/write.c           | 23 ++++++++++++-----------
 include/linux/nfs_page.h |  4 +++-
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index a53e50123499..3391c8b97da5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -498,7 +498,9 @@ static void nfs_direct_add_page_head(struct list_head *list,
 	kref_get(&head->wb_kref);
 }
 
-static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
+static void nfs_direct_join_group(struct list_head *list,
+				  struct nfs_commit_info *cinfo,
+				  struct inode *inode)
 {
 	struct nfs_page *req, *subreq;
 
@@ -520,7 +522,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
 				nfs_release_request(subreq);
 			}
 		} while ((subreq = subreq->wb_this_page) != req);
-		nfs_join_page_group(req, inode);
+		nfs_join_page_group(req, cinfo, inode);
 	}
 }
 
@@ -545,7 +547,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
 
-	nfs_direct_join_group(&reqs, dreq->inode);
+	nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
 
 	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
 	get_dreq(dreq);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f4cca8f00c0c..8c1ee1a1a28f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -59,7 +59,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
 static const struct nfs_rw_ops nfs_rw_write_ops;
 static void nfs_inode_remove_request(struct nfs_page *req);
-static void nfs_clear_request_commit(struct nfs_page *req);
+static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
+				     struct nfs_page *req);
 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
 				      struct inode *inode);
 static struct nfs_page *
@@ -502,8 +503,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
  * the (former) group.  All subrequests are removed from any write or commit
  * lists, unlinked from the group and destroyed.
  */
-void
-nfs_join_page_group(struct nfs_page *head, struct inode *inode)
+void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
+			 struct inode *inode)
 {
 	struct nfs_page *subreq;
 	struct nfs_page *destroy_list = NULL;
@@ -533,7 +534,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
 	 * Commit list removal accounting is done after locks are dropped */
 	subreq = head;
 	do {
-		nfs_clear_request_commit(subreq);
+		nfs_clear_request_commit(cinfo, subreq);
 		subreq = subreq->wb_this_page;
 	} while (subreq != head);
 
@@ -566,8 +567,10 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
 {
 	struct inode *inode = folio_file_mapping(folio)->host;
 	struct nfs_page *head;
+	struct nfs_commit_info cinfo;
 	int ret;
 
+	nfs_init_cinfo_from_inode(&cinfo, inode);
 	/*
 	 * A reference is taken only on the head request which acts as a
 	 * reference to the whole page group - the group will not be destroyed
@@ -584,7 +587,7 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
 		return ERR_PTR(ret);
 	}
 
-	nfs_join_page_group(head, inode);
+	nfs_join_page_group(head, &cinfo, inode);
 
 	return head;
 }
@@ -955,18 +958,16 @@ static void nfs_folio_clear_commit(struct folio *folio)
 }
 
 /* Called holding the request lock on @req */
-static void
-nfs_clear_request_commit(struct nfs_page *req)
+static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
+				     struct nfs_page *req)
 {
 	if (test_bit(PG_CLEAN, &req->wb_flags)) {
 		struct nfs_open_context *ctx = nfs_req_openctx(req);
 		struct inode *inode = d_inode(ctx->dentry);
-		struct nfs_commit_info cinfo;
 
-		nfs_init_cinfo_from_inode(&cinfo, inode);
 		mutex_lock(&NFS_I(inode)->commit_mutex);
-		if (!pnfs_clear_request_commit(req, &cinfo)) {
-			nfs_request_remove_commit_list(req, &cinfo);
+		if (!pnfs_clear_request_commit(req, cinfo)) {
+			nfs_request_remove_commit_list(req, cinfo);
 		}
 		mutex_unlock(&NFS_I(inode)->commit_mutex);
 		nfs_folio_clear_commit(nfs_page_to_folio(req));
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index aa9f4c6ebe26..1c315f854ea8 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -157,7 +157,9 @@ extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
 extern	struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
 extern	int nfs_page_group_lock_subrequests(struct nfs_page *head);
-extern	void nfs_join_page_group(struct nfs_page *head, struct inode *inode);
+extern void nfs_join_page_group(struct nfs_page *head,
+				struct nfs_commit_info *cinfo,
+				struct inode *inode);
 extern int nfs_page_group_lock(struct nfs_page *);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 5/5] NFS: More fixes for nfs_direct_write_reschedule_io()
  2023-09-04 16:34       ` [PATCH v2 4/5] NFS: Use the correct commit info in nfs_join_page_group() trondmy
@ 2023-09-04 16:34         ` trondmy
  0 siblings, 0 replies; 12+ messages in thread
From: trondmy @ 2023-09-04 16:34 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Ensure that all requests are put back onto the commit list so that they
can be rescheduled.

Fixes: 4daaeba93822 ("NFS: Fix nfs_direct_write_reschedule_io()")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3391c8b97da5..f6c74f424691 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -780,18 +780,23 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error)
 static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
+	struct nfs_page *req;
+	struct nfs_commit_info cinfo;
 
 	trace_nfs_direct_write_reschedule_io(dreq);
 
+	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 	spin_lock(&dreq->lock);
-	if (dreq->error == 0) {
+	if (dreq->error == 0)
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-		/* fake unstable write to let common nfs resend pages */
-		hdr->verf.committed = NFS_UNSTABLE;
-		hdr->good_bytes = hdr->args.offset + hdr->args.count -
-			hdr->io_start;
-	}
+	set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	spin_unlock(&dreq->lock);
+	while (!list_empty(&hdr->pages)) {
+		req = nfs_list_entry(hdr->pages.next);
+		nfs_list_remove_request(req);
+		nfs_unlock_request(req);
+		nfs_mark_request_commit(req, NULL, &cinfo, 0);
+	}
 }
 
 static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling
  2023-09-04 16:34 ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling trondmy
  2023-09-04 16:34   ` [PATCH v2 2/5] NFS: Fix O_DIRECT locking issues trondmy
@ 2023-11-09 15:05   ` Benjamin Coddington
  2023-11-09 16:53     ` Trond Myklebust
  1 sibling, 1 reply; 12+ messages in thread
From: Benjamin Coddington @ 2023-11-09 15:05 UTC (permalink / raw)
  To: trondmy; +Cc: Anna Schumaker, linux-nfs

Hi Trond,

On 4 Sep 2023, at 12:34, trondmy@kernel.org wrote:

> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> If we fail to schedule a request for transmission, there are 2
> possibilities:
> 1) Either we hit a fatal error, and we just want to drop the remaining
>    requests on the floor.
> 2) We were asked to try again, in which case we should allow the
>    outstanding RPC calls to complete, so that we can recoalesce requests
>    and try again.
>
> Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application")
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/direct.c | 62 ++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 46 insertions(+), 16 deletions(-)
>
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index 47d892a1d363..ee88f0a6e7b8 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -528,10 +528,9 @@ nfs_direct_write_scan_commit_list(struct inode *inode,
>  static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
>  {
>  	struct nfs_pageio_descriptor desc;
> -	struct nfs_page *req, *tmp;
> +	struct nfs_page *req;
>  	LIST_HEAD(reqs);
>  	struct nfs_commit_info cinfo;
> -	LIST_HEAD(failed);
>
>  	nfs_init_cinfo_from_dreq(&cinfo, dreq);
>  	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
> @@ -549,27 +548,36 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
>  			      &nfs_direct_write_completion_ops);
>  	desc.pg_dreq = dreq;
>
> -	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
> +	while (!list_empty(&reqs)) {
> +		req = nfs_list_entry(reqs.next);
>  		/* Bump the transmission count */
>  		req->wb_nio++;
>  		if (!nfs_pageio_add_request(&desc, req)) {
> -			nfs_list_move_request(req, &failed);
>  			spin_lock(&cinfo.inode->i_lock);
> -			dreq->flags = 0;
> -			if (desc.pg_error < 0)
> +			if (dreq->error < 0) {
> +				desc.pg_error = dreq->error;
> +			} else if (desc.pg_error != -EAGAIN) {
> +				dreq->flags = 0;
> +				if (!desc.pg_error)
> +					desc.pg_error = -EIO;
>  				dreq->error = desc.pg_error;
> -			else
> -				dreq->error = -EIO;
> +			} else
> +				dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
>  			spin_unlock(&cinfo.inode->i_lock);
> +			break;
>  		}
>  		nfs_release_request(req);
>  	}
>  	nfs_pageio_complete(&desc);
>
> -	while (!list_empty(&failed)) {
> -		req = nfs_list_entry(failed.next);
> +	while (!list_empty(&reqs)) {
> +		req = nfs_list_entry(reqs.next);
>  		nfs_list_remove_request(req);
>  		nfs_unlock_and_release_request(req);
> +		if (desc.pg_error == -EAGAIN)
> +			nfs_mark_request_commit(req, NULL, &cinfo, 0);
> +		else
> +			nfs_release_request(req);
>  	}
>
>  	if (put_dreq(dreq))
> @@ -794,9 +802,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
>  {
>  	struct nfs_pageio_descriptor desc;
>  	struct inode *inode = dreq->inode;
> +	struct nfs_commit_info cinfo;
>  	ssize_t result = 0;
>  	size_t requested_bytes = 0;
>  	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
> +	bool defer = false;
>
>  	trace_nfs_direct_write_schedule_iovec(dreq);
>
> @@ -837,17 +847,37 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
>  				break;
>  			}
>
> -			nfs_lock_request(req);
> -			if (!nfs_pageio_add_request(&desc, req)) {
> -				result = desc.pg_error;
> -				nfs_unlock_and_release_request(req);
> -				break;
> -			}
>  			pgbase = 0;
>  			bytes -= req_len;
>  			requested_bytes += req_len;
>  			pos += req_len;
>  			dreq->bytes_left -= req_len;
> +
> +			if (defer) {
> +				nfs_mark_request_commit(req, NULL, &cinfo, 0);
> +				continue;
> +			}
> +
> +			nfs_lock_request(req);
> +			if (nfs_pageio_add_request(&desc, req))
> +				continue;

Just a note, we've hit some trouble with this one on pNFS SCSI.

When we re-order the update of dreq->bytes_left and
nfs_pageio_add_request(), the blocklayout driver machinery ends up with bad
calculations for LAYOUTGET on the first req.   What I see is a short
LAYOUTGET, and then a 2nd LAYOUGET for the last req with loga_length 0,
causing some havoc.

Eventually, there's a corruption somewhere - I think because
nfs_pageio_add_request() ends up doing nfs_pageio_do() in the middle of this
thing and then we race to something in completion - I haven't quite been
able to figure that part out yet.

Ben


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling
  2023-11-09 15:05   ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling Benjamin Coddington
@ 2023-11-09 16:53     ` Trond Myklebust
  2023-11-09 17:25       ` Benjamin Coddington
  0 siblings, 1 reply; 12+ messages in thread
From: Trond Myklebust @ 2023-11-09 16:53 UTC (permalink / raw)
  To: bcodding; +Cc: linux-nfs, anna.schumaker

On Thu, 2023-11-09 at 10:05 -0500, Benjamin Coddington wrote:
> Hi Trond,
> 
> On 4 Sep 2023, at 12:34, trondmy@kernel.org wrote:
> 
> > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > 
> > If we fail to schedule a request for transmission, there are 2
> > possibilities:
> > 1) Either we hit a fatal error, and we just want to drop the
> > remaining
> >    requests on the floor.
> > 2) We were asked to try again, in which case we should allow the
> >    outstanding RPC calls to complete, so that we can recoalesce
> > requests
> >    and try again.
> > 
> > Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to
> > application")
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  fs/nfs/direct.c | 62 ++++++++++++++++++++++++++++++++++++---------
> > ----
> >  1 file changed, 46 insertions(+), 16 deletions(-)
> > 
> > diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> > index 47d892a1d363..ee88f0a6e7b8 100644
> > --- a/fs/nfs/direct.c
> > +++ b/fs/nfs/direct.c
> > @@ -528,10 +528,9 @@ nfs_direct_write_scan_commit_list(struct inode
> > *inode,
> >  static void nfs_direct_write_reschedule(struct nfs_direct_req
> > *dreq)
> >  {
> >  	struct nfs_pageio_descriptor desc;
> > -	struct nfs_page *req, *tmp;
> > +	struct nfs_page *req;
> >  	LIST_HEAD(reqs);
> >  	struct nfs_commit_info cinfo;
> > -	LIST_HEAD(failed);
> > 
> >  	nfs_init_cinfo_from_dreq(&cinfo, dreq);
> >  	nfs_direct_write_scan_commit_list(dreq->inode, &reqs,
> > &cinfo);
> > @@ -549,27 +548,36 @@ static void
> > nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
> >  			      &nfs_direct_write_completion_ops);
> >  	desc.pg_dreq = dreq;
> > 
> > -	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
> > +	while (!list_empty(&reqs)) {
> > +		req = nfs_list_entry(reqs.next);
> >  		/* Bump the transmission count */
> >  		req->wb_nio++;
> >  		if (!nfs_pageio_add_request(&desc, req)) {
> > -			nfs_list_move_request(req, &failed);
> >  			spin_lock(&cinfo.inode->i_lock);
> > -			dreq->flags = 0;
> > -			if (desc.pg_error < 0)
> > +			if (dreq->error < 0) {
> > +				desc.pg_error = dreq->error;
> > +			} else if (desc.pg_error != -EAGAIN) {
> > +				dreq->flags = 0;
> > +				if (!desc.pg_error)
> > +					desc.pg_error = -EIO;
> >  				dreq->error = desc.pg_error;
> > -			else
> > -				dreq->error = -EIO;
> > +			} else
> > +				dreq->flags =
> > NFS_ODIRECT_RESCHED_WRITES;
> >  			spin_unlock(&cinfo.inode->i_lock);
> > +			break;
> >  		}
> >  		nfs_release_request(req);
> >  	}
> >  	nfs_pageio_complete(&desc);
> > 
> > -	while (!list_empty(&failed)) {
> > -		req = nfs_list_entry(failed.next);
> > +	while (!list_empty(&reqs)) {
> > +		req = nfs_list_entry(reqs.next);
> >  		nfs_list_remove_request(req);
> >  		nfs_unlock_and_release_request(req);
> > +		if (desc.pg_error == -EAGAIN)
> > +			nfs_mark_request_commit(req, NULL, &cinfo,
> > 0);
> > +		else
> > +			nfs_release_request(req);
> >  	}
> > 
> >  	if (put_dreq(dreq))
> > @@ -794,9 +802,11 @@ static ssize_t
> > nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
> >  {
> >  	struct nfs_pageio_descriptor desc;
> >  	struct inode *inode = dreq->inode;
> > +	struct nfs_commit_info cinfo;
> >  	ssize_t result = 0;
> >  	size_t requested_bytes = 0;
> >  	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize,
> > PAGE_SIZE);
> > +	bool defer = false;
> > 
> >  	trace_nfs_direct_write_schedule_iovec(dreq);
> > 
> > @@ -837,17 +847,37 @@ static ssize_t
> > nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
> >  				break;
> >  			}
> > 
> > -			nfs_lock_request(req);
> > -			if (!nfs_pageio_add_request(&desc, req)) {
> > -				result = desc.pg_error;
> > -
> > 				nfs_unlock_and_release_request(req);
> > -				break;
> > -			}
> >  			pgbase = 0;
> >  			bytes -= req_len;
> >  			requested_bytes += req_len;
> >  			pos += req_len;
> >  			dreq->bytes_left -= req_len;
> > +
> > +			if (defer) {
> > +				nfs_mark_request_commit(req, NULL,
> > &cinfo, 0);
> > +				continue;
> > +			}
> > +
> > +			nfs_lock_request(req);
> > +			if (nfs_pageio_add_request(&desc, req))
> > +				continue;
> 
> Just a note, we've hit some trouble with this one on pNFS SCSI.
> 
> When we re-order the update of dreq->bytes_left and
> nfs_pageio_add_request(), the blocklayout driver machinery ends up
> with bad
> calculations for LAYOUTGET on the first req.   What I see is a short
> LAYOUTGET, and then a 2nd LAYOUGET for the last req with loga_length
> 0,
> causing some havoc.
> 
> Eventually, there's a corruption somewhere - I think because
> nfs_pageio_add_request() ends up doing nfs_pageio_do() in the middle
> of this
> thing and then we race to something in completion - I haven't quite
> been
> able to figure that part out yet.
> 

Hi Ben,

Relying on the value of dreq->bytes_left is just not a good idea, given
that the layoutget request could end up returning NFS4ERR_DELAY.

How about something like the following patch?

8<-----------------------------------------------------
From e68e9c928c9d843c482ec08e1d26350b7999cafa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 9 Nov 2023 11:46:28 -0500
Subject: [PATCH] pNFS: Fix the pnfs block driver's calculation of layoutget
 size

Instead of relying on the value of the 'bytes_left' field, we should
calculate the layout size based on the offset of the request that is
being written out.

Reported-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/blocklayout/blocklayout.c | 5 ++---
 fs/nfs/direct.c                  | 5 +++--
 fs/nfs/internal.h                | 2 +-
 fs/nfs/pnfs.c                    | 3 ++-
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 943aeea1eb16..c1cc9fe93dd4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -893,10 +893,9 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 	}
 
 	if (pgio->pg_dreq == NULL)
-		wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
-					      req->wb_index);
+		wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index);
 	else
-		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req));
 
 	pnfs_generic_pg_init_write(pgio, req, wb_size);
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f6c74f424691..5918c67dae0d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -205,9 +205,10 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
 	kref_put(&dreq->kref, nfs_direct_req_free);
 }
 
-ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
+ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset)
 {
-	return dreq->bytes_left;
+	loff_t start = offset - dreq->io_start;
+	return dreq->max_count - start;
 }
 EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9c9cf764f600..b1fa81c9dff6 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -655,7 +655,7 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
 /* direct.c */
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 			      struct nfs_direct_req *dreq);
-extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
+extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset);
 
 /* nfs4proc.c */
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 21a365357629..0c0fed1ecd0b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2733,7 +2733,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
 		if (pgio->pg_dreq == NULL)
 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
 		else
-			rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+			rd_size = nfs_dreq_bytes_left(pgio->pg_dreq,
+						      req_offset(req));
 
 		pgio->pg_lseg =
 			pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
-- 
2.41.0


-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling
  2023-11-09 16:53     ` Trond Myklebust
@ 2023-11-09 17:25       ` Benjamin Coddington
  2023-11-15 13:04         ` Benjamin Coddington
  0 siblings, 1 reply; 12+ messages in thread
From: Benjamin Coddington @ 2023-11-09 17:25 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, anna.schumaker

On 9 Nov 2023, at 11:53, Trond Myklebust wrote:
>
> Hi Ben,
>
> Relying on the value of dreq->bytes_left is just not a good idea, given
> that the layoutget request could end up returning NFS4ERR_DELAY.
>
> How about something like the following patch?

That looks promising!  I think ->bytes_left could get dropped after this.

I'll send it through some testing and report back, thanks!

Ben


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling
  2023-11-09 17:25       ` Benjamin Coddington
@ 2023-11-15 13:04         ` Benjamin Coddington
  2023-11-15 17:16           ` Trond Myklebust
  0 siblings, 1 reply; 12+ messages in thread
From: Benjamin Coddington @ 2023-11-15 13:04 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, anna.schumaker

On 9 Nov 2023, at 12:25, Benjamin Coddington wrote:

> On 9 Nov 2023, at 11:53, Trond Myklebust wrote:
>>
>> Hi Ben,
>>
>> Relying on the value of dreq->bytes_left is just not a good idea, given
>> that the layoutget request could end up returning NFS4ERR_DELAY.
>>
>> How about something like the following patch?
>
> That looks promising!  I think ->bytes_left could get dropped after this.
>
> I'll send it through some testing and report back, thanks!

This definitely fixes it, sorry for the delay getting back.

Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling")
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Tested-by: Benjamin Coddington <bcodding@redhat.com>

It creates some clear additional work to remove nfs_direct_req->bytes_left
(I don't think its needed anymore) and fixup the nfs_direct_req_class
tracepoint, which could be a follow-up patch or get folded in.

Ben


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling
  2023-11-15 13:04         ` Benjamin Coddington
@ 2023-11-15 17:16           ` Trond Myklebust
  2023-11-15 21:30             ` Benjamin Coddington
  0 siblings, 1 reply; 12+ messages in thread
From: Trond Myklebust @ 2023-11-15 17:16 UTC (permalink / raw)
  To: bcodding; +Cc: linux-nfs, anna.schumaker

On Wed, 2023-11-15 at 08:04 -0500, Benjamin Coddington wrote:
> On 9 Nov 2023, at 12:25, Benjamin Coddington wrote:
> 
> > On 9 Nov 2023, at 11:53, Trond Myklebust wrote:
> > > 
> > > Hi Ben,
> > > 
> > > Relying on the value of dreq->bytes_left is just not a good idea,
> > > given
> > > that the layoutget request could end up returning NFS4ERR_DELAY.
> > > 
> > > How about something like the following patch?
> > 
> > That looks promising!  I think ->bytes_left could get dropped after
> > this.
> > 
> > I'll send it through some testing and report back, thanks!
> 
> This definitely fixes it, sorry for the delay getting back.
> 
> Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write
> scheduling")
> Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
> Tested-by: Benjamin Coddington <bcodding@redhat.com>
> 
> It creates some clear additional work to remove nfs_direct_req-
> >bytes_left
> (I don't think its needed anymore) and fixup the nfs_direct_req_class
> tracepoint, which could be a follow-up patch or get folded in.
> 

Thank you! I'll queue that patch up so it gets included in the next
bugfix pull request.

I agree that we should get rid of the bytes_left field. We can queue
something up for that in the next merge window.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling
  2023-11-15 17:16           ` Trond Myklebust
@ 2023-11-15 21:30             ` Benjamin Coddington
  0 siblings, 0 replies; 12+ messages in thread
From: Benjamin Coddington @ 2023-11-15 21:30 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, anna.schumaker

On 15 Nov 2023, at 12:16, Trond Myklebust wrote:

> On Wed, 2023-11-15 at 08:04 -0500, Benjamin Coddington wrote:
>> On 9 Nov 2023, at 12:25, Benjamin Coddington wrote:
>>
>>> On 9 Nov 2023, at 11:53, Trond Myklebust wrote:
>>>>
>>>> Hi Ben,
>>>>
>>>> Relying on the value of dreq->bytes_left is just not a good idea,
>>>> given
>>>> that the layoutget request could end up returning NFS4ERR_DELAY.
>>>>
>>>> How about something like the following patch?
>>>
>>> That looks promising!  I think ->bytes_left could get dropped after
>>> this.
>>>
>>> I'll send it through some testing and report back, thanks!
>>
>> This definitely fixes it, sorry for the delay getting back.
>>
>> Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write
>> scheduling")
>> Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
>> Tested-by: Benjamin Coddington <bcodding@redhat.com>
>>
>> It creates some clear additional work to remove nfs_direct_req-
>>> bytes_left
>> (I don't think its needed anymore) and fixup the nfs_direct_req_class
>> tracepoint, which could be a follow-up patch or get folded in.
>>
>
> Thank you! I'll queue that patch up so it gets included in the next
> bugfix pull request.

Thank you for the fix.

> I agree that we should get rid of the bytes_left field. We can queue
> something up for that in the next merge window.

I have it tested already with yours, I'll send it along.

Ben


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-11-15 21:30 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-04 16:34 [PATCH v2 0/5] Fix O_DIRECT writeback error paths trondmy
2023-09-04 16:34 ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling trondmy
2023-09-04 16:34   ` [PATCH v2 2/5] NFS: Fix O_DIRECT locking issues trondmy
2023-09-04 16:34     ` [PATCH v2 3/5] NFS: More O_DIRECT accounting fixes for error paths trondmy
2023-09-04 16:34       ` [PATCH v2 4/5] NFS: Use the correct commit info in nfs_join_page_group() trondmy
2023-09-04 16:34         ` [PATCH v2 5/5] NFS: More fixes for nfs_direct_write_reschedule_io() trondmy
2023-11-09 15:05   ` [PATCH v2 1/5] NFS: Fix error handling for O_DIRECT write scheduling Benjamin Coddington
2023-11-09 16:53     ` Trond Myklebust
2023-11-09 17:25       ` Benjamin Coddington
2023-11-15 13:04         ` Benjamin Coddington
2023-11-15 17:16           ` Trond Myklebust
2023-11-15 21:30             ` Benjamin Coddington

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).