* [PATCH] ceph: try to queue a writeback if revoking fails
@ 2022-04-28 8:29 Xiubo Li
2022-04-28 11:10 ` Jeff Layton
0 siblings, 1 reply; 5+ messages in thread
From: Xiubo Li @ 2022-04-28 8:29 UTC (permalink / raw)
To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li
If the pagecaches writeback just finished and the i_wrbuffer_ref
reaches zero it will try to trigger ceph_check_caps(). But if just
before ceph_check_caps() the i_wrbuffer_ref could be increased
again by mmap/cache write, then the Fwb revoke will fail.
We need to try to queue a writeback in this case instead of
triggering the writeback by BDI's delayed work per 5 seconds.
URL: https://tracker.ceph.com/issues/55377
URL: https://tracker.ceph.com/issues/46904
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/caps.c | 44 +++++++++++++++++++++++++++++++++++---------
fs/ceph/super.h | 7 +++++++
2 files changed, 42 insertions(+), 9 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 906c95d2a4ed..0c0c8f5ae3b3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1912,6 +1912,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct rb_node *p;
bool queue_invalidate = false;
bool tried_invalidate = false;
+ bool queue_writeback = false;
if (session)
ceph_get_mds_session(session);
@@ -2064,10 +2065,30 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
}
/* completed revocation? going down and there are no caps? */
- if (revoking && (revoking & cap_used) == 0) {
- dout("completed revocation of %s\n",
- ceph_cap_string(cap->implemented & ~cap->issued));
- goto ack;
+ if (revoking) {
+ if ((revoking & cap_used) == 0) {
+ dout("completed revocation of %s\n",
+ ceph_cap_string(cap->implemented & ~cap->issued));
+ goto ack;
+ }
+
+ /*
+ * If the "i_wrbuffer_ref" was increased by mmap or generic
+ * cache write just before the ceph_check_caps() is called,
+ * the Fb capability revoking will fail this time. Then we
+ * must wait for the BDI's delayed work to flush the dirty
+ * pages and to release the "i_wrbuffer_ref", which will cost
+ * at most 5 seconds. That means the MDS needs to wait at
+ * most 5 seconds to finished the Fb capability's revocation.
+ *
+ * Let's queue a writeback for it.
+ */
+ if ((ci->i_last_caps &
+ (CEPH_CAP_FAKE_WRBUFFER | CEPH_CAP_FILE_BUFFER)) &&
+ ci->i_wrbuffer_ref && S_ISREG(inode->i_mode) &&
+ (revoking & CEPH_CAP_FILE_BUFFER)) {
+ queue_writeback = true;
+ }
}
/* want more caps from mds? */
@@ -2134,9 +2155,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
__cap_delay_requeue(mdsc, ci);
}
+ ci->i_last_caps = 0;
spin_unlock(&ci->i_ceph_lock);
ceph_put_mds_session(session);
+ if (queue_writeback)
+ ceph_queue_writeback(inode);
if (queue_invalidate)
ceph_queue_invalidate(inode);
}
@@ -3084,16 +3108,16 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
--ci->i_pin_ref;
if (had & CEPH_CAP_FILE_RD)
if (--ci->i_rd_ref == 0)
- last++;
+ last |= CEPH_CAP_FILE_RD;
if (had & CEPH_CAP_FILE_CACHE)
if (--ci->i_rdcache_ref == 0)
- last++;
+ last |= CEPH_CAP_FILE_CACHE;
if (had & CEPH_CAP_FILE_EXCL)
if (--ci->i_fx_ref == 0)
- last++;
+ last |= CEPH_CAP_FILE_EXCL;
if (had & CEPH_CAP_FILE_BUFFER) {
if (--ci->i_wb_ref == 0) {
- last++;
+ last |= CEPH_CAP_FILE_BUFFER;
/* put the ref held by ceph_take_cap_refs() */
put++;
check_flushsnaps = true;
@@ -3103,7 +3127,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
}
if (had & CEPH_CAP_FILE_WR) {
if (--ci->i_wr_ref == 0) {
- last++;
+ last |= CEPH_CAP_FILE_WR;
check_flushsnaps = true;
if (ci->i_wrbuffer_ref_head == 0 &&
ci->i_dirty_caps == 0 &&
@@ -3131,6 +3155,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
flushsnaps = 1;
wake = 1;
}
+ ci->i_last_caps |= last;
spin_unlock(&ci->i_ceph_lock);
dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
@@ -3193,6 +3218,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
spin_lock(&ci->i_ceph_lock);
ci->i_wrbuffer_ref -= nr;
if (ci->i_wrbuffer_ref == 0) {
+ ci->i_last_caps |= CEPH_CAP_FAKE_WRBUFFER;
last = true;
put++;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 73db7f6021f3..f275a41649af 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -362,6 +362,13 @@ struct ceph_inode_info {
struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */
+ /*
+ * The capabilities whose references reach to 0, and the bit
+ * (CEPH_CAP_BITS) is for i_wrbuffer_ref.
+ */
+#define CEPH_CAP_FAKE_WRBUFFER (1 << CEPH_CAP_BITS)
+ unsigned i_last_caps;
+
/*
* Link to the auth cap's session's s_cap_dirty list. s_cap_dirty
* is protected by the mdsc->cap_dirty_lock, but each individual item
--
2.36.0.rc1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] ceph: try to queue a writeback if revoking fails
2022-04-28 8:29 [PATCH] ceph: try to queue a writeback if revoking fails Xiubo Li
@ 2022-04-28 11:10 ` Jeff Layton
2022-04-28 11:52 ` Xiubo Li
0 siblings, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2022-04-28 11:10 UTC (permalink / raw)
To: Xiubo Li; +Cc: idryomov, vshankar, ceph-devel
On Thu, 2022-04-28 at 16:29 +0800, Xiubo Li wrote:
> If the pagecaches writeback just finished and the i_wrbuffer_ref
> reaches zero it will try to trigger ceph_check_caps(). But if just
> before ceph_check_caps() the i_wrbuffer_ref could be increased
> again by mmap/cache write, then the Fwb revoke will fail.
>
> We need to try to queue a writeback in this case instead of
> triggering the writeback by BDI's delayed work per 5 seconds.
>
> URL: https://tracker.ceph.com/issues/55377
> URL: https://tracker.ceph.com/issues/46904
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/caps.c | 44 +++++++++++++++++++++++++++++++++++---------
> fs/ceph/super.h | 7 +++++++
> 2 files changed, 42 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 906c95d2a4ed..0c0c8f5ae3b3 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1912,6 +1912,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> struct rb_node *p;
> bool queue_invalidate = false;
> bool tried_invalidate = false;
> + bool queue_writeback = false;
>
> if (session)
> ceph_get_mds_session(session);
> @@ -2064,10 +2065,30 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> }
>
> /* completed revocation? going down and there are no caps? */
> - if (revoking && (revoking & cap_used) == 0) {
> - dout("completed revocation of %s\n",
> - ceph_cap_string(cap->implemented & ~cap->issued));
> - goto ack;
> + if (revoking) {
> + if ((revoking & cap_used) == 0) {
> + dout("completed revocation of %s\n",
> + ceph_cap_string(cap->implemented & ~cap->issued));
> + goto ack;
> + }
> +
> + /*
> + * If the "i_wrbuffer_ref" was increased by mmap or generic
> + * cache write just before the ceph_check_caps() is called,
> + * the Fb capability revoking will fail this time. Then we
> + * must wait for the BDI's delayed work to flush the dirty
> + * pages and to release the "i_wrbuffer_ref", which will cost
> + * at most 5 seconds. That means the MDS needs to wait at
> + * most 5 seconds to finished the Fb capability's revocation.
> + *
> + * Let's queue a writeback for it.
> + */
> + if ((ci->i_last_caps &
> + (CEPH_CAP_FAKE_WRBUFFER | CEPH_CAP_FILE_BUFFER)) &&
> + ci->i_wrbuffer_ref && S_ISREG(inode->i_mode) &&
> + (revoking & CEPH_CAP_FILE_BUFFER)) {
> + queue_writeback = true;
> + }
Is i_last_caps really necessary? It's handling seems very complex and
it's not 100% clear to me what it's supposed to represent. I'm also not
crazy about the FAKE_WRBUFFER thing.
It seems to me that we ought to queue writeback anytime Fb is being
revoked and i_wrbuffer_ref is non 0. Maybe something like this instead
would be simpler?
if (S_ISREG(inode->i_mode) && (revoking & CEPH_CAP_FILE_BUFFER) &&
ci->i_wrbuffer_ref)
queue_writeback = true;
> }
>
> /* want more caps from mds? */
> @@ -2134,9 +2155,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> __cap_delay_requeue(mdsc, ci);
> }
>
> + ci->i_last_caps = 0;
> spin_unlock(&ci->i_ceph_lock);
>
> ceph_put_mds_session(session);
> + if (queue_writeback)
> + ceph_queue_writeback(inode);
> if (queue_invalidate)
> ceph_queue_invalidate(inode);
> }
> @@ -3084,16 +3108,16 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
> --ci->i_pin_ref;
> if (had & CEPH_CAP_FILE_RD)
> if (--ci->i_rd_ref == 0)
> - last++;
> + last |= CEPH_CAP_FILE_RD;
> if (had & CEPH_CAP_FILE_CACHE)
> if (--ci->i_rdcache_ref == 0)
> - last++;
> + last |= CEPH_CAP_FILE_CACHE;
> if (had & CEPH_CAP_FILE_EXCL)
> if (--ci->i_fx_ref == 0)
> - last++;
> + last |= CEPH_CAP_FILE_EXCL;
> if (had & CEPH_CAP_FILE_BUFFER) {
> if (--ci->i_wb_ref == 0) {
> - last++;
> + last |= CEPH_CAP_FILE_BUFFER;
> /* put the ref held by ceph_take_cap_refs() */
> put++;
> check_flushsnaps = true;
> @@ -3103,7 +3127,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
> }
> if (had & CEPH_CAP_FILE_WR) {
> if (--ci->i_wr_ref == 0) {
> - last++;
> + last |= CEPH_CAP_FILE_WR;
> check_flushsnaps = true;
> if (ci->i_wrbuffer_ref_head == 0 &&
> ci->i_dirty_caps == 0 &&
> @@ -3131,6 +3155,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
> flushsnaps = 1;
> wake = 1;
> }
> + ci->i_last_caps |= last;
> spin_unlock(&ci->i_ceph_lock);
>
> dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
> @@ -3193,6 +3218,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
> spin_lock(&ci->i_ceph_lock);
> ci->i_wrbuffer_ref -= nr;
> if (ci->i_wrbuffer_ref == 0) {
> + ci->i_last_caps |= CEPH_CAP_FAKE_WRBUFFER;
> last = true;
> put++;
> }
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 73db7f6021f3..f275a41649af 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -362,6 +362,13 @@ struct ceph_inode_info {
> struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
> unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */
>
> + /*
> + * The capabilities whose references reach to 0, and the bit
> + * (CEPH_CAP_BITS) is for i_wrbuffer_ref.
> + */
> +#define CEPH_CAP_FAKE_WRBUFFER (1 << CEPH_CAP_BITS)
> + unsigned i_last_caps;
> +
> /*
> * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty
> * is protected by the mdsc->cap_dirty_lock, but each individual item
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ceph: try to queue a writeback if revoking fails
2022-04-28 11:10 ` Jeff Layton
@ 2022-04-28 11:52 ` Xiubo Li
0 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2022-04-28 11:52 UTC (permalink / raw)
To: Jeff Layton; +Cc: idryomov, vshankar, ceph-devel
On 4/28/22 7:10 PM, Jeff Layton wrote:
> On Thu, 2022-04-28 at 16:29 +0800, Xiubo Li wrote:
>> If the pagecaches writeback just finished and the i_wrbuffer_ref
>> reaches zero it will try to trigger ceph_check_caps(). But if just
>> before ceph_check_caps() the i_wrbuffer_ref could be increased
>> again by mmap/cache write, then the Fwb revoke will fail.
>>
>> We need to try to queue a writeback in this case instead of
>> triggering the writeback by BDI's delayed work per 5 seconds.
>>
>> URL: https://tracker.ceph.com/issues/55377
>> URL: https://tracker.ceph.com/issues/46904
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/caps.c | 44 +++++++++++++++++++++++++++++++++++---------
>> fs/ceph/super.h | 7 +++++++
>> 2 files changed, 42 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 906c95d2a4ed..0c0c8f5ae3b3 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -1912,6 +1912,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
>> struct rb_node *p;
>> bool queue_invalidate = false;
>> bool tried_invalidate = false;
>> + bool queue_writeback = false;
>>
>> if (session)
>> ceph_get_mds_session(session);
>> @@ -2064,10 +2065,30 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
>> }
>>
>> /* completed revocation? going down and there are no caps? */
>> - if (revoking && (revoking & cap_used) == 0) {
>> - dout("completed revocation of %s\n",
>> - ceph_cap_string(cap->implemented & ~cap->issued));
>> - goto ack;
>> + if (revoking) {
>> + if ((revoking & cap_used) == 0) {
>> + dout("completed revocation of %s\n",
>> + ceph_cap_string(cap->implemented & ~cap->issued));
>> + goto ack;
>> + }
>> +
>> + /*
>> + * If the "i_wrbuffer_ref" was increased by mmap or generic
>> + * cache write just before the ceph_check_caps() is called,
>> + * the Fb capability revoking will fail this time. Then we
>> + * must wait for the BDI's delayed work to flush the dirty
>> + * pages and to release the "i_wrbuffer_ref", which will cost
>> + * at most 5 seconds. That means the MDS needs to wait at
>> + * most 5 seconds to finished the Fb capability's revocation.
>> + *
>> + * Let's queue a writeback for it.
>> + */
>> + if ((ci->i_last_caps &
>> + (CEPH_CAP_FAKE_WRBUFFER | CEPH_CAP_FILE_BUFFER)) &&
>> + ci->i_wrbuffer_ref && S_ISREG(inode->i_mode) &&
>> + (revoking & CEPH_CAP_FILE_BUFFER)) {
>> + queue_writeback = true;
>> + }
> Is i_last_caps really necessary? It's handling seems very complex and
> it's not 100% clear to me what it's supposed to represent. I'm also not
> crazy about the FAKE_WRBUFFER thing.
>
> It seems to me that we ought to queue writeback anytime Fb is being
> revoked and i_wrbuffer_ref is non 0. Maybe something like this instead
> would be simpler?
>
> if (S_ISREG(inode->i_mode) && (revoking & CEPH_CAP_FILE_BUFFER) &&
> ci->i_wrbuffer_ref)
> queue_writeback = true;
Just supposed the ceph_check_caps() is called when the Fb caps ref
reaches to 0 in mmap and generic write cases should we do this.
Yeah, I think we should also queue the writeback in other cases.
I will fix it.
-- Xiubo
>
>
>> }
>>
>> /* want more caps from mds? */
>> @@ -2134,9 +2155,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
>> __cap_delay_requeue(mdsc, ci);
>> }
>>
>> + ci->i_last_caps = 0;
>> spin_unlock(&ci->i_ceph_lock);
>>
>> ceph_put_mds_session(session);
>> + if (queue_writeback)
>> + ceph_queue_writeback(inode);
>> if (queue_invalidate)
>> ceph_queue_invalidate(inode);
>> }
>> @@ -3084,16 +3108,16 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>> --ci->i_pin_ref;
>> if (had & CEPH_CAP_FILE_RD)
>> if (--ci->i_rd_ref == 0)
>> - last++;
>> + last |= CEPH_CAP_FILE_RD;
>> if (had & CEPH_CAP_FILE_CACHE)
>> if (--ci->i_rdcache_ref == 0)
>> - last++;
>> + last |= CEPH_CAP_FILE_CACHE;
>> if (had & CEPH_CAP_FILE_EXCL)
>> if (--ci->i_fx_ref == 0)
>> - last++;
>> + last |= CEPH_CAP_FILE_EXCL;
>> if (had & CEPH_CAP_FILE_BUFFER) {
>> if (--ci->i_wb_ref == 0) {
>> - last++;
>> + last |= CEPH_CAP_FILE_BUFFER;
>> /* put the ref held by ceph_take_cap_refs() */
>> put++;
>> check_flushsnaps = true;
>> @@ -3103,7 +3127,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>> }
>> if (had & CEPH_CAP_FILE_WR) {
>> if (--ci->i_wr_ref == 0) {
>> - last++;
>> + last |= CEPH_CAP_FILE_WR;
>> check_flushsnaps = true;
>> if (ci->i_wrbuffer_ref_head == 0 &&
>> ci->i_dirty_caps == 0 &&
>> @@ -3131,6 +3155,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>> flushsnaps = 1;
>> wake = 1;
>> }
>> + ci->i_last_caps |= last;
>> spin_unlock(&ci->i_ceph_lock);
>>
>> dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
>> @@ -3193,6 +3218,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>> spin_lock(&ci->i_ceph_lock);
>> ci->i_wrbuffer_ref -= nr;
>> if (ci->i_wrbuffer_ref == 0) {
>> + ci->i_last_caps |= CEPH_CAP_FAKE_WRBUFFER;
>> last = true;
>> put++;
>> }
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 73db7f6021f3..f275a41649af 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -362,6 +362,13 @@ struct ceph_inode_info {
>> struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
>> unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */
>>
>> + /*
>> + * The capabilities whose references reach to 0, and the bit
>> + * (CEPH_CAP_BITS) is for i_wrbuffer_ref.
>> + */
>> +#define CEPH_CAP_FAKE_WRBUFFER (1 << CEPH_CAP_BITS)
>> + unsigned i_last_caps;
>> +
>> /*
>> * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty
>> * is protected by the mdsc->cap_dirty_lock, but each individual item
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ceph: try to queue a writeback if revoking fails
2022-04-28 12:48 Xiubo Li
@ 2022-04-28 12:49 ` Xiubo Li
0 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2022-04-28 12:49 UTC (permalink / raw)
To: jlayton; +Cc: idryomov, vshankar, ceph-devel
Please ignore this, just forgot to add the v2 tag.
-- Xiubo
On 4/28/22 8:48 PM, Xiubo Li wrote:
> If the pagecaches writeback just finished and the i_wrbuffer_ref
> reaches zero it will try to trigger ceph_check_caps(). But if just
> before ceph_check_caps() the i_wrbuffer_ref could be increased
> again by mmap/cache write, then the Fwb revoke will fail.
>
> We need to try to queue a writeback in this case instead of
> triggering the writeback by BDI's delayed work per 5 seconds.
>
> URL: https://tracker.ceph.com/issues/55377
> URL: https://tracker.ceph.com/issues/46904
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/caps.c | 28 ++++++++++++++++++++++++----
> 1 file changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 906c95d2a4ed..22dae29be64d 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1912,6 +1912,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> struct rb_node *p;
> bool queue_invalidate = false;
> bool tried_invalidate = false;
> + bool queue_writeback = false;
>
> if (session)
> ceph_get_mds_session(session);
> @@ -2064,10 +2065,27 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> }
>
> /* completed revocation? going down and there are no caps? */
> - if (revoking && (revoking & cap_used) == 0) {
> - dout("completed revocation of %s\n",
> - ceph_cap_string(cap->implemented & ~cap->issued));
> - goto ack;
> + if (revoking) {
> + if ((revoking & cap_used) == 0) {
> + dout("completed revocation of %s\n",
> + ceph_cap_string(cap->implemented & ~cap->issued));
> + goto ack;
> + }
> +
> + /*
> + * If the "i_wrbuffer_ref" was increased by mmap or generic
> + * cache write just before the ceph_check_caps() is called,
> + * the Fb capability revoking will fail this time. Then we
> + * must wait for the BDI's delayed work to flush the dirty
> + * pages and to release the "i_wrbuffer_ref", which will cost
> + * at most 5 seconds. That means the MDS needs to wait at
> + * most 5 seconds to finished the Fb capability's revocation.
> + *
> + * Let's queue a writeback for it.
> + */
> + if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
> + (revoking & CEPH_CAP_FILE_BUFFER))
> + queue_writeback = true;
> }
>
> /* want more caps from mds? */
> @@ -2137,6 +2155,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> spin_unlock(&ci->i_ceph_lock);
>
> ceph_put_mds_session(session);
> + if (queue_writeback)
> + ceph_queue_writeback(inode);
> if (queue_invalidate)
> ceph_queue_invalidate(inode);
> }
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] ceph: try to queue a writeback if revoking fails
@ 2022-04-28 12:48 Xiubo Li
2022-04-28 12:49 ` Xiubo Li
0 siblings, 1 reply; 5+ messages in thread
From: Xiubo Li @ 2022-04-28 12:48 UTC (permalink / raw)
To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li
If the pagecaches writeback just finished and the i_wrbuffer_ref
reaches zero it will try to trigger ceph_check_caps(). But if just
before ceph_check_caps() the i_wrbuffer_ref could be increased
again by mmap/cache write, then the Fwb revoke will fail.
We need to try to queue a writeback in this case instead of
triggering the writeback by BDI's delayed work per 5 seconds.
URL: https://tracker.ceph.com/issues/55377
URL: https://tracker.ceph.com/issues/46904
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/caps.c | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 906c95d2a4ed..22dae29be64d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1912,6 +1912,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct rb_node *p;
bool queue_invalidate = false;
bool tried_invalidate = false;
+ bool queue_writeback = false;
if (session)
ceph_get_mds_session(session);
@@ -2064,10 +2065,27 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
}
/* completed revocation? going down and there are no caps? */
- if (revoking && (revoking & cap_used) == 0) {
- dout("completed revocation of %s\n",
- ceph_cap_string(cap->implemented & ~cap->issued));
- goto ack;
+ if (revoking) {
+ if ((revoking & cap_used) == 0) {
+ dout("completed revocation of %s\n",
+ ceph_cap_string(cap->implemented & ~cap->issued));
+ goto ack;
+ }
+
+ /*
+ * If the "i_wrbuffer_ref" was increased by mmap or generic
+ * cache write just before the ceph_check_caps() is called,
+ * the Fb capability revoking will fail this time. Then we
+ * must wait for the BDI's delayed work to flush the dirty
+ * pages and to release the "i_wrbuffer_ref", which will cost
+ * at most 5 seconds. That means the MDS needs to wait at
+ * most 5 seconds to finished the Fb capability's revocation.
+ *
+ * Let's queue a writeback for it.
+ */
+ if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
+ (revoking & CEPH_CAP_FILE_BUFFER))
+ queue_writeback = true;
}
/* want more caps from mds? */
@@ -2137,6 +2155,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
spin_unlock(&ci->i_ceph_lock);
ceph_put_mds_session(session);
+ if (queue_writeback)
+ ceph_queue_writeback(inode);
if (queue_invalidate)
ceph_queue_invalidate(inode);
}
--
2.36.0.rc1
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-04-28 12:50 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-28 8:29 [PATCH] ceph: try to queue a writeback if revoking fails Xiubo Li
2022-04-28 11:10 ` Jeff Layton
2022-04-28 11:52 ` Xiubo Li
2022-04-28 12:48 Xiubo Li
2022-04-28 12:49 ` Xiubo Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.