* [RFC PATCH v2] ceph: do not execute direct write in parallel if O_APPEND is specified
@ 2020-02-04 2:28 xiubli
2020-02-04 14:35 ` Jeff Layton
0 siblings, 1 reply; 3+ messages in thread
From: xiubli @ 2020-02-04 2:28 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: sage, pdonnell, hch, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
In O_APPEND & O_DIRECT mode, the data from different writers will
be possiblly overlapping each other with shared lock.
For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
mode:
Writer1 Writer2
shared_lock() shared_lock()
getattr(CAP_SIZE) getattr(CAP_SIZE)
iocb->ki_pos = EOF iocb->ki_pos = EOF
write(data1)
write(data2)
shared_unlock() shared_unlock()
The data2 will overlap the data1 from the same file offset, the
old EOF.
Switch to exclusive lock instead when O_APPEND is specified.
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
Changed in V2:
- fix the commit comment
- add more detail in the commit comment
- s/direct_lock/shared_lock/g
fs/ceph/file.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ac7fe8b8081c..e3e67ef215dd 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1475,6 +1475,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
+ bool shared_lock = false;
loff_t pos;
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
@@ -1485,8 +1486,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!prealloc_cf)
return -ENOMEM;
+ if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
+ shared_lock = true;
+
retry_snap:
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (shared_lock)
ceph_start_io_direct(inode);
else
ceph_start_io_write(inode);
@@ -1576,14 +1580,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* we might need to revert back to that point */
data = *from;
- if (iocb->ki_flags & IOCB_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT)
written = ceph_direct_read_write(iocb, &data, snapc,
&prealloc_cf);
- ceph_end_io_direct(inode);
- } else {
+ else
written = ceph_sync_write(iocb, &data, pos, snapc);
+ if (shared_lock)
+ ceph_end_io_direct(inode);
+ else
ceph_end_io_write(inode);
- }
if (written > 0)
iov_iter_advance(from, written);
ceph_put_snap_context(snapc);
@@ -1634,7 +1639,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out_unlocked;
out:
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (shared_lock)
ceph_end_io_direct(inode);
else
ceph_end_io_write(inode);
--
2.21.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [RFC PATCH v2] ceph: do not execute direct write in parallel if O_APPEND is specified
2020-02-04 2:28 [RFC PATCH v2] ceph: do not execute direct write in parallel if O_APPEND is specified xiubli
@ 2020-02-04 14:35 ` Jeff Layton
2020-02-04 14:44 ` Xiubo Li
0 siblings, 1 reply; 3+ messages in thread
From: Jeff Layton @ 2020-02-04 14:35 UTC (permalink / raw)
To: xiubli, idryomov, zyan; +Cc: sage, pdonnell, hch, ceph-devel
On Mon, 2020-02-03 at 21:28 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> In O_APPEND & O_DIRECT mode, the data from different writers will
> be possiblly overlapping each other with shared lock.
>
> For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
> mode:
>
> Writer1 Writer2
>
> shared_lock() shared_lock()
> getattr(CAP_SIZE) getattr(CAP_SIZE)
> iocb->ki_pos = EOF iocb->ki_pos = EOF
> write(data1)
> write(data2)
> shared_unlock() shared_unlock()
>
> The data2 will overlap the data1 from the same file offset, the
> old EOF.
>
> Switch to exclusive lock instead when O_APPEND is specified.
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> Changed in V2:
> - fix the commit comment
> - add more detail in the commit comment
> - s/direct_lock/shared_lock/g
>
> fs/ceph/file.c | 17 +++++++++++------
> 1 file changed, 11 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index ac7fe8b8081c..e3e67ef215dd 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1475,6 +1475,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> struct ceph_cap_flush *prealloc_cf;
> ssize_t count, written = 0;
> int err, want, got;
> + bool shared_lock = false;
> loff_t pos;
> loff_t limit = max(i_size_read(inode), fsc->max_file_size);
>
> @@ -1485,8 +1486,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> if (!prealloc_cf)
> return -ENOMEM;
>
> + if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
> + shared_lock = true;
> +
> retry_snap:
> - if (iocb->ki_flags & IOCB_DIRECT)
> + if (shared_lock)
> ceph_start_io_direct(inode);
> else
> ceph_start_io_write(inode);
> @@ -1576,14 +1580,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>
> /* we might need to revert back to that point */
> data = *from;
> - if (iocb->ki_flags & IOCB_DIRECT) {
> + if (iocb->ki_flags & IOCB_DIRECT)
> written = ceph_direct_read_write(iocb, &data, snapc,
> &prealloc_cf);
> - ceph_end_io_direct(inode);
> - } else {
> + else
> written = ceph_sync_write(iocb, &data, pos, snapc);
> + if (shared_lock)
> + ceph_end_io_direct(inode);
> + else
> ceph_end_io_write(inode);
> - }
> if (written > 0)
> iov_iter_advance(from, written);
> ceph_put_snap_context(snapc);
> @@ -1634,7 +1639,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>
> goto out_unlocked;
> out:
> - if (iocb->ki_flags & IOCB_DIRECT)
> + if (shared_lock)
> ceph_end_io_direct(inode);
> else
> ceph_end_io_write(inode);
Ok, I think this looks reasonable, but I actually preferred the
"direct_lock" name you had before. I'm going to do some testing today
and will probably merge this (with s/shared_lock/direct_lock/) if it
tests out ok.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC PATCH v2] ceph: do not execute direct write in parallel if O_APPEND is specified
2020-02-04 14:35 ` Jeff Layton
@ 2020-02-04 14:44 ` Xiubo Li
0 siblings, 0 replies; 3+ messages in thread
From: Xiubo Li @ 2020-02-04 14:44 UTC (permalink / raw)
To: Jeff Layton, idryomov, zyan; +Cc: sage, pdonnell, hch, ceph-devel
On 2020/2/4 22:35, Jeff Layton wrote:
> On Mon, 2020-02-03 at 21:28 -0500, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> In O_APPEND & O_DIRECT mode, the data from different writers will
>> be possiblly overlapping each other with shared lock.
>>
>> For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
>> mode:
>>
>> Writer1 Writer2
>>
>> shared_lock() shared_lock()
>> getattr(CAP_SIZE) getattr(CAP_SIZE)
>> iocb->ki_pos = EOF iocb->ki_pos = EOF
>> write(data1)
>> write(data2)
>> shared_unlock() shared_unlock()
>>
>> The data2 will overlap the data1 from the same file offset, the
>> old EOF.
>>
>> Switch to exclusive lock instead when O_APPEND is specified.
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> Changed in V2:
>> - fix the commit comment
>> - add more detail in the commit comment
>> - s/direct_lock/shared_lock/g
>>
>> fs/ceph/file.c | 17 +++++++++++------
>> 1 file changed, 11 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index ac7fe8b8081c..e3e67ef215dd 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -1475,6 +1475,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>> struct ceph_cap_flush *prealloc_cf;
>> ssize_t count, written = 0;
>> int err, want, got;
>> + bool shared_lock = false;
>> loff_t pos;
>> loff_t limit = max(i_size_read(inode), fsc->max_file_size);
>>
>> @@ -1485,8 +1486,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>> if (!prealloc_cf)
>> return -ENOMEM;
>>
>> + if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
>> + shared_lock = true;
>> +
>> retry_snap:
>> - if (iocb->ki_flags & IOCB_DIRECT)
>> + if (shared_lock)
>> ceph_start_io_direct(inode);
>> else
>> ceph_start_io_write(inode);
>> @@ -1576,14 +1580,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>>
>> /* we might need to revert back to that point */
>> data = *from;
>> - if (iocb->ki_flags & IOCB_DIRECT) {
>> + if (iocb->ki_flags & IOCB_DIRECT)
>> written = ceph_direct_read_write(iocb, &data, snapc,
>> &prealloc_cf);
>> - ceph_end_io_direct(inode);
>> - } else {
>> + else
>> written = ceph_sync_write(iocb, &data, pos, snapc);
>> + if (shared_lock)
>> + ceph_end_io_direct(inode);
>> + else
>> ceph_end_io_write(inode);
>> - }
>> if (written > 0)
>> iov_iter_advance(from, written);
>> ceph_put_snap_context(snapc);
>> @@ -1634,7 +1639,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>>
>> goto out_unlocked;
>> out:
>> - if (iocb->ki_flags & IOCB_DIRECT)
>> + if (shared_lock)
>> ceph_end_io_direct(inode);
>> else
>> ceph_end_io_write(inode);
> Ok, I think this looks reasonable, but I actually preferred the
> "direct_lock" name you had before. I'm going to do some testing today
> and will probably merge this (with s/shared_lock/direct_lock/) if it
> tests out ok.
Okay :-) Thanks.
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-02-04 14:44 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-04 2:28 [RFC PATCH v2] ceph: do not execute direct write in parallel if O_APPEND is specified xiubli
2020-02-04 14:35 ` Jeff Layton
2020-02-04 14:44 ` Xiubo Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.