Linux-NFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2 0/2] Fix O_DIRECT error handling
@ 2019-10-01 20:19 Trond Myklebust
  2019-10-01 20:19 ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Trond Myklebust
  0 siblings, 1 reply; 5+ messages in thread
From: Trond Myklebust @ 2019-10-01 20:19 UTC (permalink / raw)
  To: Su Yanjun, Anna Schumaker; +Cc: linux-nfs

These patches fix issues with how O_DIRECT handles read and write
errors, and also how it handles eof.

v2:
- Change to handle all short reads and writes in the same manner.

Note that we can still end up breaking generic/465 by using the
nconnect=<n> (where n > 1) mount option. The reason is that the
test assumes the writes land in increasing order on the server,
despite having being launched in parallel.

Trond Myklebust (2):
  NFS: Fix O_DIRECT accounting of number of bytes read/written
  NFS: Remove redundant mirror tracking in O_DIRECT

 fs/nfs/direct.c | 111 ++++++++++++++++++------------------------------
 1 file changed, 41 insertions(+), 70 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written
  2019-10-01 20:19 [PATCH v2 0/2] Fix O_DIRECT error handling Trond Myklebust
@ 2019-10-01 20:19 ` Trond Myklebust
  2019-10-01 20:20   ` [PATCH v2 2/2] NFS: Remove redundant mirror tracking in O_DIRECT Trond Myklebust
  2019-10-08  8:11   ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Su Yanjun
  0 siblings, 2 replies; 5+ messages in thread
From: Trond Myklebust @ 2019-10-01 20:19 UTC (permalink / raw)
  To: Su Yanjun, Anna Schumaker; +Cc: linux-nfs

When a series of O_DIRECT reads or writes are truncated, either due to
eof or due to an error, then we should return the number of contiguous
bytes that were received/sent starting at the offset specified by the
application.

Currently, we are failing to correctly check contiguity, and so we're
failing the generic/465 in xfstests when the race between the read
and write RPCs causes the file to get extended while the 2 reads are
outstanding. If the first read RPC call wins the race and returns with
eof set, we should treat the second read RPC as being truncated.

Reported-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
Fixes: 1ccbad9f9f9bd ("nfs: fix DIO good bytes calculation")
Cc: stable@vger.kernel.org # 4.1+
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c | 82 ++++++++++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 35 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 222d7115db71..62cb4a1a87f0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -123,32 +123,53 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 }
 
 static void
-nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
+nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
+			    const struct nfs_pgio_header *hdr,
+			    ssize_t dreq_len)
 {
-	int i;
-	ssize_t count;
+	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx];
+	loff_t hdr_arg = hdr->io_start + hdr->args.count;
+	ssize_t expected = 0;
+
+	if (hdr_arg > dreq->io_start)
+		expected = hdr_arg - dreq->io_start;
+
+	if (dreq_len == expected)
+		return;
+	if (dreq->max_count > dreq_len) {
+		dreq->max_count = dreq_len;
+		if (dreq->count > dreq_len)
+			dreq->count = dreq_len;
+
+		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
+			dreq->error = hdr->error;
+		else if (hdr->good_bytes > 0)
+			dreq->error = 0;
+	}
+	if (mirror->count > dreq_len)
+		mirror->count = dreq_len;
+}
 
-	WARN_ON_ONCE(dreq->count >= dreq->max_count);
+static void
+nfs_direct_count_bytes(struct nfs_direct_req *dreq,
+		       const struct nfs_pgio_header *hdr)
+{
+	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx];
+	loff_t hdr_end = hdr->io_start + hdr->good_bytes;
+	ssize_t dreq_len = 0;
 
-	if (dreq->mirror_count == 1) {
-		dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
-		dreq->count += hdr->good_bytes;
-	} else {
-		/* mirrored writes */
-		count = dreq->mirrors[hdr->pgio_mirror_idx].count;
-		if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
-			count = hdr->io_start + hdr->good_bytes - dreq->io_start;
-			dreq->mirrors[hdr->pgio_mirror_idx].count = count;
-		}
-		/* update the dreq->count by finding the minimum agreed count from all
-		 * mirrors */
-		count = dreq->mirrors[0].count;
+	if (hdr_end > dreq->io_start)
+		dreq_len = hdr_end - dreq->io_start;
 
-		for (i = 1; i < dreq->mirror_count; i++)
-			count = min(count, dreq->mirrors[i].count);
+	nfs_direct_handle_truncated(dreq, hdr, dreq_len);
 
-		dreq->count = count;
-	}
+	if (dreq_len > dreq->max_count)
+		dreq_len = dreq->max_count;
+
+	if (mirror->count < dreq_len)
+		mirror->count = dreq_len;
+	if (dreq->count < dreq_len)
+		dreq->count = dreq_len;
 }
 
 /*
@@ -402,20 +423,12 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 	struct nfs_direct_req *dreq = hdr->dreq;
 
 	spin_lock(&dreq->lock);
-	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
-		dreq->error = hdr->error;
-
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		spin_unlock(&dreq->lock);
 		goto out_put;
 	}
 
-	if (hdr->good_bytes != 0)
-		nfs_direct_good_bytes(dreq, hdr);
-
-	if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
-		dreq->error = 0;
-
+	nfs_direct_count_bytes(dreq, hdr);
 	spin_unlock(&dreq->lock);
 
 	while (!list_empty(&hdr->pages)) {
@@ -652,6 +665,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
 
 	dreq->count = 0;
+	dreq->max_count = 0;
+	list_for_each_entry(req, &reqs, wb_list)
+		dreq->max_count += req->wb_bytes;
 	dreq->verf.committed = NFS_INVALID_STABLE_HOW;
 	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
 	for (i = 0; i < dreq->mirror_count; i++)
@@ -791,17 +807,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 
 	spin_lock(&dreq->lock);
-
-	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
-		dreq->error = hdr->error;
-
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		spin_unlock(&dreq->lock);
 		goto out_put;
 	}
 
+	nfs_direct_count_bytes(dreq, hdr);
 	if (hdr->good_bytes != 0) {
-		nfs_direct_good_bytes(dreq, hdr);
 		if (nfs_write_need_commit(hdr)) {
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				request_commit = true;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] NFS: Remove redundant mirror tracking in O_DIRECT
  2019-10-01 20:19 ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Trond Myklebust
@ 2019-10-01 20:20   ` Trond Myklebust
  2019-10-08  8:11   ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Su Yanjun
  1 sibling, 0 replies; 5+ messages in thread
From: Trond Myklebust @ 2019-10-01 20:20 UTC (permalink / raw)
  To: Su Yanjun, Anna Schumaker; +Cc: linux-nfs

We no longer need the extra mirror length tracking in the O_DIRECT code,
as we are able to track the maximum contiguous length in dreq->max_count.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c | 41 -----------------------------------------
 1 file changed, 41 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 62cb4a1a87f0..c959d7a3044c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -64,13 +64,6 @@
 
 static struct kmem_cache *nfs_direct_cachep;
 
-/*
- * This represents a set of asynchronous requests that we're waiting on
- */
-struct nfs_direct_mirror {
-	ssize_t count;
-};
-
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 
@@ -84,9 +77,6 @@ struct nfs_direct_req {
 	atomic_t		io_count;	/* i/os we're waiting for */
 	spinlock_t		lock;		/* protect completion state */
 
-	struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
-	int			mirror_count;
-
 	loff_t			io_start;	/* Start offset for I/O */
 	ssize_t			count,		/* bytes actually processed */
 				max_count,	/* max expected count */
@@ -127,7 +117,6 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
 			    const struct nfs_pgio_header *hdr,
 			    ssize_t dreq_len)
 {
-	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx];
 	loff_t hdr_arg = hdr->io_start + hdr->args.count;
 	ssize_t expected = 0;
 
@@ -146,15 +135,12 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
 		else if (hdr->good_bytes > 0)
 			dreq->error = 0;
 	}
-	if (mirror->count > dreq_len)
-		mirror->count = dreq_len;
 }
 
 static void
 nfs_direct_count_bytes(struct nfs_direct_req *dreq,
 		       const struct nfs_pgio_header *hdr)
 {
-	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx];
 	loff_t hdr_end = hdr->io_start + hdr->good_bytes;
 	ssize_t dreq_len = 0;
 
@@ -166,8 +152,6 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq,
 	if (dreq_len > dreq->max_count)
 		dreq_len = dreq->max_count;
 
-	if (mirror->count < dreq_len)
-		mirror->count = dreq_len;
 	if (dreq->count < dreq_len)
 		dreq->count = dreq_len;
 }
@@ -314,18 +298,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 	cinfo->completion_ops = &nfs_direct_commit_completion_ops;
 }
 
-static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq,
-					     struct nfs_pageio_descriptor *pgio,
-					     struct nfs_page *req)
-{
-	int mirror_count = 1;
-
-	if (pgio->pg_ops->pg_get_mirror_count)
-		mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
-
-	dreq->mirror_count = mirror_count;
-}
-
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
@@ -340,7 +312,6 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
 	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
-	dreq->mirror_count = 1;
 	spin_lock_init(&dreq->lock);
 
 	return dreq;
@@ -659,7 +630,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	LIST_HEAD(reqs);
 	struct nfs_commit_info cinfo;
 	LIST_HEAD(failed);
-	int i;
 
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
@@ -670,21 +640,12 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 		dreq->max_count += req->wb_bytes;
 	dreq->verf.committed = NFS_INVALID_STABLE_HOW;
 	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
-	for (i = 0; i < dreq->mirror_count; i++)
-		dreq->mirrors[i].count = 0;
 	get_dreq(dreq);
 
 	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 
-	req = nfs_list_entry(reqs.next);
-	nfs_direct_setup_mirroring(dreq, &desc, req);
-	if (desc.pg_error < 0) {
-		list_splice_init(&reqs, &failed);
-		goto out_failed;
-	}
-
 	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
 		/* Bump the transmission count */
 		req->wb_nio++;
@@ -702,7 +663,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	}
 	nfs_pageio_complete(&desc);
 
-out_failed:
 	while (!list_empty(&failed)) {
 		req = nfs_list_entry(failed.next);
 		nfs_list_remove_request(req);
@@ -935,7 +895,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 				break;
 			}
 
-			nfs_direct_setup_mirroring(dreq, &desc, req);
 			if (desc.pg_error < 0) {
 				nfs_free_request(req);
 				result = desc.pg_error;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written
  2019-10-01 20:19 ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Trond Myklebust
  2019-10-01 20:20   ` [PATCH v2 2/2] NFS: Remove redundant mirror tracking in O_DIRECT Trond Myklebust
@ 2019-10-08  8:11   ` Su Yanjun
  2019-10-08 12:42     ` Trond Myklebust
  1 sibling, 1 reply; 5+ messages in thread
From: Su Yanjun @ 2019-10-08  8:11 UTC (permalink / raw)
  To: Trond Myklebust, Anna Schumaker; +Cc: linux-nfs

Hi Trond,

After this patch we have new problem. Sometimes generic/465 test failed.

We capture the packets and find that "read" will read file hole data 
when "write" is going on.

Something like this situation as below,

|xxxxxxxxxxxx|                  |xxxxxxxxxxxxxxxxx|

\-------w-----/\---hole---/\----------w-------/

                              ^

                               R


When "read"/"write" needs more rpc calls,  read rpc call in middle will 
read file hole data.

Because rpc calls in "write" are async, once rpc calls  in "write" are 
out of order then "read" may read hole data.

Maybe we need serialize the r/w in directio mode, what do you think?

Thanks

在 2019/10/2 4:19, Trond Myklebust 写道:
> When a series of O_DIRECT reads or writes are truncated, either due to
> eof or due to an error, then we should return the number of contiguous
> bytes that were received/sent starting at the offset specified by the
> application.
>
> Currently, we are failing to correctly check contiguity, and so we're
> failing the generic/465 in xfstests when the race between the read
> and write RPCs causes the file to get extended while the 2 reads are
> outstanding. If the first read RPC call wins the race and returns with
> eof set, we should treat the second read RPC as being truncated.
>
> Reported-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
> Fixes: 1ccbad9f9f9bd ("nfs: fix DIO good bytes calculation")
> Cc: stable@vger.kernel.org # 4.1+
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>   fs/nfs/direct.c | 82 ++++++++++++++++++++++++++++---------------------
>   1 file changed, 47 insertions(+), 35 deletions(-)
>
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index 222d7115db71..62cb4a1a87f0 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -123,32 +123,53 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
>   }
>   
>   static void
> -nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
> +nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
> +			    const struct nfs_pgio_header *hdr,
> +			    ssize_t dreq_len)
>   {
> -	int i;
> -	ssize_t count;
> +	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx];
> +	loff_t hdr_arg = hdr->io_start + hdr->args.count;
> +	ssize_t expected = 0;
> +
> +	if (hdr_arg > dreq->io_start)
> +		expected = hdr_arg - dreq->io_start;
> +
> +	if (dreq_len == expected)
> +		return;
> +	if (dreq->max_count > dreq_len) {
> +		dreq->max_count = dreq_len;
> +		if (dreq->count > dreq_len)
> +			dreq->count = dreq_len;
> +
> +		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
> +			dreq->error = hdr->error;
> +		else if (hdr->good_bytes > 0)
> +			dreq->error = 0;
> +	}
> +	if (mirror->count > dreq_len)
> +		mirror->count = dreq_len;
> +}
>   
> -	WARN_ON_ONCE(dreq->count >= dreq->max_count);
> +static void
> +nfs_direct_count_bytes(struct nfs_direct_req *dreq,
> +		       const struct nfs_pgio_header *hdr)
> +{
> +	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr->pgio_mirror_idx];
> +	loff_t hdr_end = hdr->io_start + hdr->good_bytes;
> +	ssize_t dreq_len = 0;
>   
> -	if (dreq->mirror_count == 1) {
> -		dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
> -		dreq->count += hdr->good_bytes;
> -	} else {
> -		/* mirrored writes */
> -		count = dreq->mirrors[hdr->pgio_mirror_idx].count;
> -		if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
> -			count = hdr->io_start + hdr->good_bytes - dreq->io_start;
> -			dreq->mirrors[hdr->pgio_mirror_idx].count = count;
> -		}
> -		/* update the dreq->count by finding the minimum agreed count from all
> -		 * mirrors */
> -		count = dreq->mirrors[0].count;
> +	if (hdr_end > dreq->io_start)
> +		dreq_len = hdr_end - dreq->io_start;
>   
> -		for (i = 1; i < dreq->mirror_count; i++)
> -			count = min(count, dreq->mirrors[i].count);
> +	nfs_direct_handle_truncated(dreq, hdr, dreq_len);
>   
> -		dreq->count = count;
> -	}
> +	if (dreq_len > dreq->max_count)
> +		dreq_len = dreq->max_count;
> +
> +	if (mirror->count < dreq_len)
> +		mirror->count = dreq_len;
> +	if (dreq->count < dreq_len)
> +		dreq->count = dreq_len;
>   }
>   
>   /*
> @@ -402,20 +423,12 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
>   	struct nfs_direct_req *dreq = hdr->dreq;
>   
>   	spin_lock(&dreq->lock);
> -	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
> -		dreq->error = hdr->error;
> -
>   	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
>   		spin_unlock(&dreq->lock);
>   		goto out_put;
>   	}
>   
> -	if (hdr->good_bytes != 0)
> -		nfs_direct_good_bytes(dreq, hdr);
> -
> -	if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
> -		dreq->error = 0;
> -
> +	nfs_direct_count_bytes(dreq, hdr);
>   	spin_unlock(&dreq->lock);
>   
>   	while (!list_empty(&hdr->pages)) {
> @@ -652,6 +665,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
>   	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
>   
>   	dreq->count = 0;
> +	dreq->max_count = 0;
> +	list_for_each_entry(req, &reqs, wb_list)
> +		dreq->max_count += req->wb_bytes;
>   	dreq->verf.committed = NFS_INVALID_STABLE_HOW;
>   	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
>   	for (i = 0; i < dreq->mirror_count; i++)
> @@ -791,17 +807,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
>   	nfs_init_cinfo_from_dreq(&cinfo, dreq);
>   
>   	spin_lock(&dreq->lock);
> -
> -	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
> -		dreq->error = hdr->error;
> -
>   	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
>   		spin_unlock(&dreq->lock);
>   		goto out_put;
>   	}
>   
> +	nfs_direct_count_bytes(dreq, hdr);
>   	if (hdr->good_bytes != 0) {
> -		nfs_direct_good_bytes(dreq, hdr);
>   		if (nfs_write_need_commit(hdr)) {
>   			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
>   				request_commit = true;



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written
  2019-10-08  8:11   ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Su Yanjun
@ 2019-10-08 12:42     ` Trond Myklebust
  0 siblings, 0 replies; 5+ messages in thread
From: Trond Myklebust @ 2019-10-08 12:42 UTC (permalink / raw)
  To: Anna.Schumaker, suyj.fnst; +Cc: linux-nfs

On Tue, 2019-10-08 at 16:11 +0800, Su Yanjun wrote:
> Hi Trond,
> 
> After this patch we have new problem. Sometimes generic/465 test
> failed.
> 
> We capture the packets and find that "read" will read file hole data 
> when "write" is going on.
> 
> Something like this situation as below,
> 
> > xxxxxxxxxxxx|                  |xxxxxxxxxxxxxxxxx|
> 
> \-------w-----/\---hole---/\----------w-------/
> 
>                               ^
> 
>                                R
> 
> 
> When "read"/"write" needs more rpc calls,  read rpc call in middle
> will 
> read file hole data.
> 
> Because rpc calls in "write" are async, once rpc calls  in "write"
> are 
> out of order then "read" may read hole data.
> 
> Maybe we need serialize the r/w in directio mode, what do you think?

No. Basic O_DIRECT does not guarantee atomicity of requests, which is
why we do not have generic locking at the VFS level when reading and
writing. The only guarantee being offered is that O_DIRECT and buffered
writes do not collide.

IOW: I think the basic premise for this test is just broken (as I
commented in the patch series I sent) because it is assuming a
behaviour that is simply not guaranteed.


BTW: note that buffered writes have the same property. They are ordered
when being written into the page cache, meaning that reads on the same
client will see no holes, however if you try to read from another
client, then you will see the same behaviour, with temporary holes
magically appearing in the file.

> 
> Thanks
> 
> 在 2019/10/2 4:19, Trond Myklebust 写道:
> > When a series of O_DIRECT reads or writes are truncated, either due
> > to
> > eof or due to an error, then we should return the number of
> > contiguous
> > bytes that were received/sent starting at the offset specified by
> > the
> > application.
> > 
> > Currently, we are failing to correctly check contiguity, and so
> > we're
> > failing the generic/465 in xfstests when the race between the read
> > and write RPCs causes the file to get extended while the 2 reads
> > are
> > outstanding. If the first read RPC call wins the race and returns
> > with
> > eof set, we should treat the second read RPC as being truncated.
> > 
> > Reported-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
> > Fixes: 1ccbad9f9f9bd ("nfs: fix DIO good bytes calculation")
> > Cc: stable@vger.kernel.org # 4.1+
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >   fs/nfs/direct.c | 82 ++++++++++++++++++++++++++++--------------
> > -------
> >   1 file changed, 47 insertions(+), 35 deletions(-)
> > 
> > diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> > index 222d7115db71..62cb4a1a87f0 100644
> > --- a/fs/nfs/direct.c
> > +++ b/fs/nfs/direct.c
> > @@ -123,32 +123,53 @@ static inline int put_dreq(struct
> > nfs_direct_req *dreq)
> >   }
> >   
> >   static void
> > -nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct
> > nfs_pgio_header *hdr)
> > +nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
> > +			    const struct nfs_pgio_header *hdr,
> > +			    ssize_t dreq_len)
> >   {
> > -	int i;
> > -	ssize_t count;
> > +	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr-
> > >pgio_mirror_idx];
> > +	loff_t hdr_arg = hdr->io_start + hdr->args.count;
> > +	ssize_t expected = 0;
> > +
> > +	if (hdr_arg > dreq->io_start)
> > +		expected = hdr_arg - dreq->io_start;
> > +
> > +	if (dreq_len == expected)
> > +		return;
> > +	if (dreq->max_count > dreq_len) {
> > +		dreq->max_count = dreq_len;
> > +		if (dreq->count > dreq_len)
> > +			dreq->count = dreq_len;
> > +
> > +		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
> > +			dreq->error = hdr->error;
> > +		else if (hdr->good_bytes > 0)
> > +			dreq->error = 0;
> > +	}
> > +	if (mirror->count > dreq_len)
> > +		mirror->count = dreq_len;
> > +}
> >   
> > -	WARN_ON_ONCE(dreq->count >= dreq->max_count);
> > +static void
> > +nfs_direct_count_bytes(struct nfs_direct_req *dreq,
> > +		       const struct nfs_pgio_header *hdr)
> > +{
> > +	struct nfs_direct_mirror *mirror = &dreq->mirrors[hdr-
> > >pgio_mirror_idx];
> > +	loff_t hdr_end = hdr->io_start + hdr->good_bytes;
> > +	ssize_t dreq_len = 0;
> >   
> > -	if (dreq->mirror_count == 1) {
> > -		dreq->mirrors[hdr->pgio_mirror_idx].count += hdr-
> > >good_bytes;
> > -		dreq->count += hdr->good_bytes;
> > -	} else {
> > -		/* mirrored writes */
> > -		count = dreq->mirrors[hdr->pgio_mirror_idx].count;
> > -		if (count + dreq->io_start < hdr->io_start + hdr-
> > >good_bytes) {
> > -			count = hdr->io_start + hdr->good_bytes - dreq-
> > >io_start;
> > -			dreq->mirrors[hdr->pgio_mirror_idx].count =
> > count;
> > -		}
> > -		/* update the dreq->count by finding the minimum agreed
> > count from all
> > -		 * mirrors */
> > -		count = dreq->mirrors[0].count;
> > +	if (hdr_end > dreq->io_start)
> > +		dreq_len = hdr_end - dreq->io_start;
> >   
> > -		for (i = 1; i < dreq->mirror_count; i++)
> > -			count = min(count, dreq->mirrors[i].count);
> > +	nfs_direct_handle_truncated(dreq, hdr, dreq_len);
> >   
> > -		dreq->count = count;
> > -	}
> > +	if (dreq_len > dreq->max_count)
> > +		dreq_len = dreq->max_count;
> > +
> > +	if (mirror->count < dreq_len)
> > +		mirror->count = dreq_len;
> > +	if (dreq->count < dreq_len)
> > +		dreq->count = dreq_len;
> >   }
> >   
> >   /*
> > @@ -402,20 +423,12 @@ static void nfs_direct_read_completion(struct
> > nfs_pgio_header *hdr)
> >   	struct nfs_direct_req *dreq = hdr->dreq;
> >   
> >   	spin_lock(&dreq->lock);
> > -	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
> > -		dreq->error = hdr->error;
> > -
> >   	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
> >   		spin_unlock(&dreq->lock);
> >   		goto out_put;
> >   	}
> >   
> > -	if (hdr->good_bytes != 0)
> > -		nfs_direct_good_bytes(dreq, hdr);
> > -
> > -	if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
> > -		dreq->error = 0;
> > -
> > +	nfs_direct_count_bytes(dreq, hdr);
> >   	spin_unlock(&dreq->lock);
> >   
> >   	while (!list_empty(&hdr->pages)) {
> > @@ -652,6 +665,9 @@ static void nfs_direct_write_reschedule(struct
> > nfs_direct_req *dreq)
> >   	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
> >   
> >   	dreq->count = 0;
> > +	dreq->max_count = 0;
> > +	list_for_each_entry(req, &reqs, wb_list)
> > +		dreq->max_count += req->wb_bytes;
> >   	dreq->verf.committed = NFS_INVALID_STABLE_HOW;
> >   	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
> >   	for (i = 0; i < dreq->mirror_count; i++)
> > @@ -791,17 +807,13 @@ static void
> > nfs_direct_write_completion(struct nfs_pgio_header *hdr)
> >   	nfs_init_cinfo_from_dreq(&cinfo, dreq);
> >   
> >   	spin_lock(&dreq->lock);
> > -
> > -	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
> > -		dreq->error = hdr->error;
> > -
> >   	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
> >   		spin_unlock(&dreq->lock);
> >   		goto out_put;
> >   	}
> >   
> > +	nfs_direct_count_bytes(dreq, hdr);
> >   	if (hdr->good_bytes != 0) {
> > -		nfs_direct_good_bytes(dreq, hdr);
> >   		if (nfs_write_need_commit(hdr)) {
> >   			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
> >   				request_commit = true;
> 
> 
-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-01 20:19 [PATCH v2 0/2] Fix O_DIRECT error handling Trond Myklebust
2019-10-01 20:19 ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Trond Myklebust
2019-10-01 20:20   ` [PATCH v2 2/2] NFS: Remove redundant mirror tracking in O_DIRECT Trond Myklebust
2019-10-08  8:11   ` [PATCH v2 1/2] NFS: Fix O_DIRECT accounting of number of bytes read/written Su Yanjun
2019-10-08 12:42     ` Trond Myklebust

Linux-NFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nfs/0 linux-nfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nfs linux-nfs/ https://lore.kernel.org/linux-nfs \
		linux-nfs@vger.kernel.org linux-nfs@archiver.kernel.org
	public-inbox-index linux-nfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-nfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox