All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ceph: return the real size readed when hit EOF
@ 2021-10-19 11:51 xiubli
  2021-10-19 11:54 ` Xiubo Li
  2021-10-19 12:59 ` Jeff Layton
  0 siblings, 2 replies; 5+ messages in thread
From: xiubli @ 2021-10-19 11:51 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, khiremat, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

At the same time set the ki_pos to the file size.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/file.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 91173d3aa161..1abc3b591740 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -847,6 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 	ssize_t ret;
 	u64 off = iocb->ki_pos;
 	u64 len = iov_iter_count(to);
+	u64 i_size = i_size_read(inode);
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
@@ -870,7 +871,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 		struct page **pages;
 		int num_pages;
 		size_t page_off;
-		u64 i_size;
 		bool more;
 		int idx;
 		size_t left;
@@ -909,7 +909,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 
 		ceph_osdc_put_request(req);
 
-		i_size = i_size_read(inode);
 		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
 		     off, len, ret, i_size, (more ? " MORE" : ""));
 
@@ -954,10 +953,15 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 
 	if (off > iocb->ki_pos) {
 		if (ret >= 0 &&
-		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
+		    iov_iter_count(to) > 0 &&
+		    off >= i_size_read(inode)) {
 			*retry_op = CHECK_EOF;
-		ret = off - iocb->ki_pos;
-		iocb->ki_pos = off;
+			ret = i_size - iocb->ki_pos;
+			iocb->ki_pos = i_size;
+		} else {
+			ret = off - iocb->ki_pos;
+			iocb->ki_pos = off;
+		}
 	}
 
 	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: return the real size readed when hit EOF
  2021-10-19 11:51 [PATCH] ceph: return the real size readed when hit EOF xiubli
@ 2021-10-19 11:54 ` Xiubo Li
  2021-10-19 12:59 ` Jeff Layton
  1 sibling, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2021-10-19 11:54 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, khiremat, pdonnell, ceph-devel

Without this, such as in case for the file encrypt feature, when doing 
the following test:

1), echo "1234567890" > dir/a.txt

2), vim dir/a.txt

It will always show the zeroed contents after string "1234567890", 
something like:

"1234567890@@@@@...."

On 10/19/21 7:51 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> At the same time set the ki_pos to the file size.
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   fs/ceph/file.c | 14 +++++++++-----
>   1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 91173d3aa161..1abc3b591740 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -847,6 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>   	ssize_t ret;
>   	u64 off = iocb->ki_pos;
>   	u64 len = iov_iter_count(to);
> +	u64 i_size = i_size_read(inode);
>   
>   	dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
>   	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
> @@ -870,7 +871,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>   		struct page **pages;
>   		int num_pages;
>   		size_t page_off;
> -		u64 i_size;
>   		bool more;
>   		int idx;
>   		size_t left;
> @@ -909,7 +909,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>   
>   		ceph_osdc_put_request(req);
>   
> -		i_size = i_size_read(inode);
>   		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
>   		     off, len, ret, i_size, (more ? " MORE" : ""));
>   
> @@ -954,10 +953,15 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>   
>   	if (off > iocb->ki_pos) {
>   		if (ret >= 0 &&
> -		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
> +		    iov_iter_count(to) > 0 &&
> +		    off >= i_size_read(inode)) {
>   			*retry_op = CHECK_EOF;
> -		ret = off - iocb->ki_pos;
> -		iocb->ki_pos = off;
> +			ret = i_size - iocb->ki_pos;
> +			iocb->ki_pos = i_size;
> +		} else {
> +			ret = off - iocb->ki_pos;
> +			iocb->ki_pos = off;
> +		}
>   	}
>   
>   	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: return the real size readed when hit EOF
  2021-10-19 11:51 [PATCH] ceph: return the real size readed when hit EOF xiubli
  2021-10-19 11:54 ` Xiubo Li
@ 2021-10-19 12:59 ` Jeff Layton
  2021-10-20  2:04   ` Xiubo Li
  1 sibling, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2021-10-19 12:59 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, khiremat, pdonnell, ceph-devel

On Tue, 2021-10-19 at 19:51 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> At the same time set the ki_pos to the file size.
> 

It would be good to put the comments in your follow up email into the
patch description here, so that it explains what you're fixing and why.

> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/file.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 91173d3aa161..1abc3b591740 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -847,6 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>  	ssize_t ret;
>  	u64 off = iocb->ki_pos;
>  	u64 len = iov_iter_count(to);
> +	u64 i_size = i_size_read(inode);
>  

Was there a reason to change where the i_size is fetched, or did you
just not see the point in fetching it on each loop? I wonder if we can
hit any races vs. truncates with this?

Oh well, all of this non-buffered I/O seems somewhat racy anyway. ;)

>  	dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
>  	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
> @@ -870,7 +871,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>  		struct page **pages;
>  		int num_pages;
>  		size_t page_off;
> -		u64 i_size;
>  		bool more;
>  		int idx;
>  		size_t left;
> @@ -909,7 +909,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>  
>  		ceph_osdc_put_request(req);
>  
> -		i_size = i_size_read(inode);
>  		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
>  		     off, len, ret, i_size, (more ? " MORE" : ""));
>  
> @@ -954,10 +953,15 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>  
>  	if (off > iocb->ki_pos) {
>  		if (ret >= 0 &&

Do we need to check ret here? I think that if ret < 0, then "off" must
be smaller than "i_size", no?

> -		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
> +		    iov_iter_count(to) > 0 &&
> +		    off >= i_size_read(inode)) {
>  			*retry_op = CHECK_EOF;
> -		ret = off - iocb->ki_pos;
> -		iocb->ki_pos = off;
> +			ret = i_size - iocb->ki_pos;
> +			iocb->ki_pos = i_size;
> +		} else {
> +			ret = off - iocb->ki_pos;
> +			iocb->ki_pos = off;
> +		}
>  	}
>  
>  	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: return the real size readed when hit EOF
  2021-10-19 12:59 ` Jeff Layton
@ 2021-10-20  2:04   ` Xiubo Li
  2021-10-20  2:15     ` Xiubo Li
  0 siblings, 1 reply; 5+ messages in thread
From: Xiubo Li @ 2021-10-20  2:04 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, khiremat, pdonnell, ceph-devel


On 10/19/21 8:59 PM, Jeff Layton wrote:
> On Tue, 2021-10-19 at 19:51 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> At the same time set the ki_pos to the file size.
>>
> It would be good to put the comments in your follow up email into the
> patch description here, so that it explains what you're fixing and why.
Sure.
>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/file.c | 14 +++++++++-----
>>   1 file changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 91173d3aa161..1abc3b591740 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -847,6 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>>   	ssize_t ret;
>>   	u64 off = iocb->ki_pos;
>>   	u64 len = iov_iter_count(to);
>> +	u64 i_size = i_size_read(inode);
>>   
> Was there a reason to change where the i_size is fetched, or did you
> just not see the point in fetching it on each loop? I wonder if we can
> hit any races vs. truncates with this?

 From my reading from the code, it shouldn't.

While doing the truncate it will down_write(&inode->i_rwsem). And the 
sync read will hold down_read() it. It should be safe.

>
> Oh well, all of this non-buffered I/O seems somewhat racy anyway. ;)
>
>>   	dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
>>   	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
>> @@ -870,7 +871,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>>   		struct page **pages;
>>   		int num_pages;
>>   		size_t page_off;
>> -		u64 i_size;
>>   		bool more;
>>   		int idx;
>>   		size_t left;
>> @@ -909,7 +909,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>>   
>>   		ceph_osdc_put_request(req);
>>   
>> -		i_size = i_size_read(inode);
>>   		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
>>   		     off, len, ret, i_size, (more ? " MORE" : ""));
>>   
>> @@ -954,10 +953,15 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>>   
>>   	if (off > iocb->ki_pos) {
>>   		if (ret >= 0 &&
> Do we need to check ret here?

It seems this check makes no sense even in the following case I 
mentioned. Will check it more and try to remove it.


> I think that if ret < 0, then "off" must
> be smaller than "i_size", no?

 From current code, there has one case that it's no, such as if the file 
size is 10, and the ceph may return 4K contents and then the read length 
will be 4K too, and if it just fails in case:

                         copied = copy_page_to_iter(pages[idx++],
                                                    page_off, len, to);
                         off += copied;
                         left -= copied;
                         if (copied < len) {
                                 ret = -EFAULT;
                                 break;
                         }

And if the "copied = 1K" for some reasons, the "off" will be larger than 
the "i_size" but ret < 0.

BRs

Xiubo



>
>> -		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
>> +		    iov_iter_count(to) > 0 &&
>> +		    off >= i_size_read(inode)) {
>>   			*retry_op = CHECK_EOF;
>> -		ret = off - iocb->ki_pos;
>> -		iocb->ki_pos = off;
>> +			ret = i_size - iocb->ki_pos;
>> +			iocb->ki_pos = i_size;
>> +		} else {
>> +			ret = off - iocb->ki_pos;
>> +			iocb->ki_pos = off;
>> +		}
>>   	}
>>   
>>   	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: return the real size readed when hit EOF
  2021-10-20  2:04   ` Xiubo Li
@ 2021-10-20  2:15     ` Xiubo Li
  0 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2021-10-20  2:15 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, khiremat, pdonnell, ceph-devel


On 10/20/21 10:04 AM, Xiubo Li wrote:
>
> On 10/19/21 8:59 PM, Jeff Layton wrote:
>> On Tue, 2021-10-19 at 19:51 +0800, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> At the same time set the ki_pos to the file size.
>>>
>> It would be good to put the comments in your follow up email into the
>> patch description here, so that it explains what you're fixing and why.
> Sure.
>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>   fs/ceph/file.c | 14 +++++++++-----
>>>   1 file changed, 9 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>> index 91173d3aa161..1abc3b591740 100644
>>> --- a/fs/ceph/file.c
>>> +++ b/fs/ceph/file.c
>>> @@ -847,6 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb 
>>> *iocb, struct iov_iter *to,
>>>       ssize_t ret;
>>>       u64 off = iocb->ki_pos;
>>>       u64 len = iov_iter_count(to);
>>> +    u64 i_size = i_size_read(inode);
>> Was there a reason to change where the i_size is fetched, or did you
>> just not see the point in fetching it on each loop? I wonder if we can
>> hit any races vs. truncates with this?
>
> From my reading from the code, it shouldn't.
>
> While doing the truncate it will down_write(&inode->i_rwsem). And the 
> sync read will hold down_read() it. It should be safe.
>
And also this should be safe if the truncate is in a different kclient:

When the MDS received the truncate request, it must revoke the 'Fr' caps 
from all the kclients first. So if another kclient is still reading the 
file, the truncate will be pause.


>>
>> Oh well, all of this non-buffered I/O seems somewhat racy anyway. ;)
>>
>>>       dout("sync_read on file %p %llu~%u %s\n", file, off, 
>>> (unsigned)len,
>>>            (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
>>> @@ -870,7 +871,6 @@ static ssize_t ceph_sync_read(struct kiocb 
>>> *iocb, struct iov_iter *to,
>>>           struct page **pages;
>>>           int num_pages;
>>>           size_t page_off;
>>> -        u64 i_size;
>>>           bool more;
>>>           int idx;
>>>           size_t left;
>>> @@ -909,7 +909,6 @@ static ssize_t ceph_sync_read(struct kiocb 
>>> *iocb, struct iov_iter *to,
>>>             ceph_osdc_put_request(req);
>>>   -        i_size = i_size_read(inode);
>>>           dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
>>>                off, len, ret, i_size, (more ? " MORE" : ""));
>>>   @@ -954,10 +953,15 @@ static ssize_t ceph_sync_read(struct kiocb 
>>> *iocb, struct iov_iter *to,
>>>         if (off > iocb->ki_pos) {
>>>           if (ret >= 0 &&
>> Do we need to check ret here?
>
> It seems this check makes no sense even in the following case I 
> mentioned. Will check it more and try to remove it.
>
>
>> I think that if ret < 0, then "off" must
>> be smaller than "i_size", no?
>
> From current code, there has one case that it's no, such as if the 
> file size is 10, and the ceph may return 4K contents and then the read 
> length will be 4K too, and if it just fails in case:
>
>                         copied = copy_page_to_iter(pages[idx++],
>                                                    page_off, len, to);
>                         off += copied;
>                         left -= copied;
>                         if (copied < len) {
>                                 ret = -EFAULT;
>                                 break;
>                         }
>
> And if the "copied = 1K" for some reasons, the "off" will be larger 
> than the "i_size" but ret < 0.
>
> BRs
>
> Xiubo
>
>
>
>>
>>> -            iov_iter_count(to) > 0 && off >= i_size_read(inode))
>>> +            iov_iter_count(to) > 0 &&
>>> +            off >= i_size_read(inode)) {
>>>               *retry_op = CHECK_EOF;
>>> -        ret = off - iocb->ki_pos;
>>> -        iocb->ki_pos = off;
>>> +            ret = i_size - iocb->ki_pos;
>>> +            iocb->ki_pos = i_size;
>>> +        } else {
>>> +            ret = off - iocb->ki_pos;
>>> +            iocb->ki_pos = off;
>>> +        }
>>>       }
>>>         dout("sync_read result %zd retry_op %d\n", ret, *retry_op);


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-10-20  2:15 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-19 11:51 [PATCH] ceph: return the real size readed when hit EOF xiubli
2021-10-19 11:54 ` Xiubo Li
2021-10-19 12:59 ` Jeff Layton
2021-10-20  2:04   ` Xiubo Li
2021-10-20  2:15     ` Xiubo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.