ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] ceph: request Fw caps before updating the mtime in ceph_write_iter
@ 2021-08-11 17:37 Jeff Layton
  2021-08-12  2:51 ` Xiubo Li
  2021-08-12  9:23 ` Luis Henriques
  0 siblings, 2 replies; 3+ messages in thread
From: Jeff Layton @ 2021-08-11 17:37 UTC (permalink / raw)
  To: ceph-devel; +Cc: idryomov, lhenriques, xiubli, Jozef Kováč

The current code will update the mtime and then try to get caps to
handle the write. If we end up having to request caps from the MDS, then
the mtime in the cap grant will clobber the updated mtime and it'll be
lost.

This is most noticable when two clients are alternately writing to the
same file. Fw caps are continually being granted and revoked, and the
mtime ends up stuck because the updated mtimes are always being
overwritten with the old one.

Fix this by changing the order of operations in ceph_write_iter. Get the
caps much earlier, and only update the times afterward. Also, make sure
we check the NEARFULL conditions before making any changes to the inode.

URL: https://tracker.ceph.com/issues/46574
Reported-by: Jozef Kováč <kovac@firma.zoznam.sk>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/file.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

v2: fix error handling -- make sure we release i_rwsem on error exit

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d1755ac1d964..da856bd5eaa5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1722,22 +1722,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		goto out;
 	}
 
-	err = file_remove_privs(file);
-	if (err)
-		goto out;
-
-	err = file_update_time(file);
-	if (err)
-		goto out;
-
-	inode_inc_iversion_raw(inode);
-
-	if (ci->i_inline_version != CEPH_INLINE_NONE) {
-		err = ceph_uninline_data(file, NULL);
-		if (err < 0)
-			goto out;
-	}
-
 	down_read(&osdc->lock);
 	map_flags = osdc->osdmap->flags;
 	pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
@@ -1748,6 +1732,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		goto out;
 	}
 
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
+		err = ceph_uninline_data(file, NULL);
+		if (err < 0)
+			goto out;
+	}
+
 	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
 	     inode, ceph_vinop(inode), pos, count, i_size_read(inode));
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -1759,6 +1749,16 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (err < 0)
 		goto out;
 
+	err = file_remove_privs(file);
+	if (err)
+		goto out_caps;
+
+	err = file_update_time(file);
+	if (err)
+		goto out_caps;
+
+	inode_inc_iversion_raw(inode);
+
 	dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
 	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
 
@@ -1822,7 +1822,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
 			ceph_check_caps(ci, 0, NULL);
 	}
-
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
 	     inode, ceph_vinop(inode), pos, (unsigned)count,
 	     ceph_cap_string(got));
@@ -1842,6 +1841,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 	goto out_unlocked;
+out_caps:
+	ceph_put_cap_refs(ci, got);
 out:
 	if (direct_lock)
 		ceph_end_io_direct(inode);
-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] ceph: request Fw caps before updating the mtime in ceph_write_iter
  2021-08-11 17:37 [PATCH v2] ceph: request Fw caps before updating the mtime in ceph_write_iter Jeff Layton
@ 2021-08-12  2:51 ` Xiubo Li
  2021-08-12  9:23 ` Luis Henriques
  1 sibling, 0 replies; 3+ messages in thread
From: Xiubo Li @ 2021-08-12  2:51 UTC (permalink / raw)
  To: Jeff Layton, ceph-devel; +Cc: idryomov, lhenriques, Jozef Kováč


On 8/12/21 1:37 AM, Jeff Layton wrote:
> The current code will update the mtime and then try to get caps to
> handle the write. If we end up having to request caps from the MDS, then
> the mtime in the cap grant will clobber the updated mtime and it'll be
> lost.
>
> This is most noticable when two clients are alternately writing to the
> same file. Fw caps are continually being granted and revoked, and the
> mtime ends up stuck because the updated mtimes are always being
> overwritten with the old one.
>
> Fix this by changing the order of operations in ceph_write_iter. Get the
> caps much earlier, and only update the times afterward. Also, make sure
> we check the NEARFULL conditions before making any changes to the inode.
>
> URL: https://tracker.ceph.com/issues/46574
> Reported-by: Jozef Kováč <kovac@firma.zoznam.sk>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>   fs/ceph/file.c | 35 ++++++++++++++++++-----------------
>   1 file changed, 18 insertions(+), 17 deletions(-)
>
> v2: fix error handling -- make sure we release i_rwsem on error exit
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index d1755ac1d964..da856bd5eaa5 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1722,22 +1722,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   		goto out;
>   	}
>   
> -	err = file_remove_privs(file);
> -	if (err)
> -		goto out;
> -
> -	err = file_update_time(file);
> -	if (err)
> -		goto out;
> -
> -	inode_inc_iversion_raw(inode);
> -
> -	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> -		err = ceph_uninline_data(file, NULL);
> -		if (err < 0)
> -			goto out;
> -	}
> -
>   	down_read(&osdc->lock);
>   	map_flags = osdc->osdmap->flags;
>   	pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
> @@ -1748,6 +1732,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   		goto out;
>   	}
>   
> +	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> +		err = ceph_uninline_data(file, NULL);
> +		if (err < 0)
> +			goto out;
> +	}
> +
>   	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
>   	     inode, ceph_vinop(inode), pos, count, i_size_read(inode));
>   	if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -1759,6 +1749,16 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   	if (err < 0)
>   		goto out;
>   
> +	err = file_remove_privs(file);
> +	if (err)
> +		goto out_caps;
> +
> +	err = file_update_time(file);
> +	if (err)
> +		goto out_caps;
> +
> +	inode_inc_iversion_raw(inode);
> +
>   	dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
>   	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
>   
> @@ -1822,7 +1822,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
>   			ceph_check_caps(ci, 0, NULL);
>   	}
> -
>   	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
>   	     inode, ceph_vinop(inode), pos, (unsigned)count,
>   	     ceph_cap_string(got));
> @@ -1842,6 +1841,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   	}
>   
>   	goto out_unlocked;
> +out_caps:
> +	ceph_put_cap_refs(ci, got);
>   out:
>   	if (direct_lock)
>   		ceph_end_io_direct(inode);

The fuse client is already correctly doing this.

LGTM

Reviewed-by: Xiubo Li <xiubli@redhat.com>





^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] ceph: request Fw caps before updating the mtime in ceph_write_iter
  2021-08-11 17:37 [PATCH v2] ceph: request Fw caps before updating the mtime in ceph_write_iter Jeff Layton
  2021-08-12  2:51 ` Xiubo Li
@ 2021-08-12  9:23 ` Luis Henriques
  1 sibling, 0 replies; 3+ messages in thread
From: Luis Henriques @ 2021-08-12  9:23 UTC (permalink / raw)
  To: Jeff Layton; +Cc: ceph-devel, idryomov, xiubli, Jozef Kováč

Jeff Layton <jlayton@kernel.org> writes:

> The current code will update the mtime and then try to get caps to
> handle the write. If we end up having to request caps from the MDS, then
> the mtime in the cap grant will clobber the updated mtime and it'll be
> lost.
>
> This is most noticable when two clients are alternately writing to the
> same file. Fw caps are continually being granted and revoked, and the
> mtime ends up stuck because the updated mtimes are always being
> overwritten with the old one.
>
> Fix this by changing the order of operations in ceph_write_iter. Get the
> caps much earlier, and only update the times afterward. Also, make sure
> we check the NEARFULL conditions before making any changes to the inode.
>
> URL: https://tracker.ceph.com/issues/46574
> Reported-by: Jozef Kováč <kovac@firma.zoznam.sk>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/ceph/file.c | 35 ++++++++++++++++++-----------------
>  1 file changed, 18 insertions(+), 17 deletions(-)
>
> v2: fix error handling -- make sure we release i_rwsem on error exit
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index d1755ac1d964..da856bd5eaa5 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1722,22 +1722,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  		goto out;
>  	}
>  
> -	err = file_remove_privs(file);
> -	if (err)
> -		goto out;
> -
> -	err = file_update_time(file);
> -	if (err)
> -		goto out;
> -
> -	inode_inc_iversion_raw(inode);
> -
> -	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> -		err = ceph_uninline_data(file, NULL);
> -		if (err < 0)
> -			goto out;
> -	}
> -
>  	down_read(&osdc->lock);
>  	map_flags = osdc->osdmap->flags;
>  	pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
> @@ -1748,6 +1732,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  		goto out;
>  	}
>  
> +	if (ci->i_inline_version != CEPH_INLINE_NONE) {
> +		err = ceph_uninline_data(file, NULL);
> +		if (err < 0)
> +			goto out;
> +	}
> +
>  	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
>  	     inode, ceph_vinop(inode), pos, count, i_size_read(inode));
>  	if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -1759,6 +1749,16 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  	if (err < 0)
>  		goto out;
>  
> +	err = file_remove_privs(file);
> +	if (err)
> +		goto out_caps;
> +
> +	err = file_update_time(file);
> +	if (err)
> +		goto out_caps;
> +
> +	inode_inc_iversion_raw(inode);
> +
>  	dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
>  	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
>  
> @@ -1822,7 +1822,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
>  			ceph_check_caps(ci, 0, NULL);
>  	}
> -
>  	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
>  	     inode, ceph_vinop(inode), pos, (unsigned)count,
>  	     ceph_cap_string(got));
> @@ -1842,6 +1841,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  	}
>  
>  	goto out_unlocked;
> +out_caps:
> +	ceph_put_cap_refs(ci, got);
>  out:
>  	if (direct_lock)
>  		ceph_end_io_direct(inode);
> -- 
>
> 2.31.1
>

LGTM too!

Reviewed-by: Luis Henriques <lhenriques@suse.de>

Cheers,
-- 
Luis

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-08-12  9:23 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-11 17:37 [PATCH v2] ceph: request Fw caps before updating the mtime in ceph_write_iter Jeff Layton
2021-08-12  2:51 ` Xiubo Li
2021-08-12  9:23 ` Luis Henriques

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).