* [PATCH] ceph: put the requests/sessions when it fails to alloc memory
@ 2022-01-12 4:29 xiubli
2022-01-13 1:39 ` Xiubo Li
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: xiubli @ 2022-01-12 4:29 UTC (permalink / raw)
To: jlayton; +Cc: idryomov, vshankar, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
When failing to allocate the sessions memory we should make sure
the req1 and req2 and the sessions get put. And also in case the
max_sessions decreased so when kreallocate the new memory some
sessions maybe missed being put.
And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
which will lead to a distinct access fault.
URL: https://tracker.ceph.com/issues/53819
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
---
fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
1 file changed, 37 insertions(+), 18 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 944b18b4e217..5c2719f66f62 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
+ unsigned int max_sessions;
int ret, err = 0;
spin_lock(&ci->i_unsafe_lock);
@@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
}
spin_unlock(&ci->i_unsafe_lock);
+ /*
+ * The mdsc->max_sessions is unlikely to be changed
+ * mostly, here we will retry it by reallocating the
+ * sessions arrary memory to get rid of the mdsc->mutex
+ * lock.
+ */
+retry:
+ max_sessions = mdsc->max_sessions;
+
/*
* Trigger to flush the journal logs in all the relevant MDSes
* manually, or in the worst case we must wait at most 5 seconds
* to wait the journal logs to be flushed by the MDSes periodically.
*/
- if (req1 || req2) {
+ if ((req1 || req2) && likely(max_sessions)) {
struct ceph_mds_session **sessions = NULL;
struct ceph_mds_session *s;
struct ceph_mds_request *req;
- unsigned int max;
int i;
- /*
- * The mdsc->max_sessions is unlikely to be changed
- * mostly, here we will retry it by reallocating the
- * sessions arrary memory to get rid of the mdsc->mutex
- * lock.
- */
-retry:
- max = mdsc->max_sessions;
- sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
- if (!sessions)
- return -ENOMEM;
+ sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
+ if (!sessions) {
+ err = -ENOMEM;
+ goto out;
+ }
spin_lock(&ci->i_unsafe_lock);
if (req1) {
list_for_each_entry(req, &ci->i_unsafe_dirops,
r_unsafe_dir_item) {
s = req->r_session;
- if (unlikely(s->s_mds >= max)) {
+ if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
+ for (i = 0; i < max_sessions; i++) {
+ s = sessions[i];
+ if (s)
+ ceph_put_mds_session(s);
+ }
+ kfree(sessions);
goto retry;
}
if (!sessions[s->s_mds]) {
@@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
list_for_each_entry(req, &ci->i_unsafe_iops,
r_unsafe_target_item) {
s = req->r_session;
- if (unlikely(s->s_mds >= max)) {
+ if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
+ for (i = 0; i < max_sessions; i++) {
+ s = sessions[i];
+ if (s)
+ ceph_put_mds_session(s);
+ }
+ kfree(sessions);
goto retry;
}
if (!sessions[s->s_mds]) {
@@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
spin_unlock(&ci->i_ceph_lock);
/* send flush mdlog request to MDSes */
- for (i = 0; i < max; i++) {
+ for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s) {
send_flush_mdlog(s);
@@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
ceph_timeout_jiffies(req1->r_timeout));
if (ret)
err = -EIO;
- ceph_mdsc_put_request(req1);
}
if (req2) {
ret = !wait_for_completion_timeout(&req2->r_safe_completion,
ceph_timeout_jiffies(req2->r_timeout));
if (ret)
err = -EIO;
- ceph_mdsc_put_request(req2);
}
+
+out:
+ if (req1)
+ ceph_mdsc_put_request(req1);
+ if (req2)
+ ceph_mdsc_put_request(req2);
return err;
}
--
2.27.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] ceph: put the requests/sessions when it fails to alloc memory
2022-01-12 4:29 [PATCH] ceph: put the requests/sessions when it fails to alloc memory xiubli
@ 2022-01-13 1:39 ` Xiubo Li
2022-01-13 7:05 ` Venky Shankar
2022-01-13 11:01 ` Jeff Layton
2 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2022-01-13 1:39 UTC (permalink / raw)
To: jlayton; +Cc: idryomov, vshankar, ceph-devel, mail
cc Niklas.
On 1/12/22 12:29 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> When failing to allocate the sessions memory we should make sure
> the req1 and req2 and the sessions get put. And also in case the
> max_sessions decreased so when kreallocate the new memory some
> sessions maybe missed being put.
>
> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
> which will lead to a distinct access fault.
>
> URL: https://tracker.ceph.com/issues/53819
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
This is reported by:
Reported-by: Niklas Hambuechen <mail@nh2.me>
I just get the correct mail from Niklas.
Regards
- Xiubo
> ---
> fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 944b18b4e217..5c2719f66f62 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
> struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_mds_request *req1 = NULL, *req2 = NULL;
> + unsigned int max_sessions;
> int ret, err = 0;
>
> spin_lock(&ci->i_unsafe_lock);
> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
> }
> spin_unlock(&ci->i_unsafe_lock);
>
> + /*
> + * The mdsc->max_sessions is unlikely to be changed
> + * mostly, here we will retry it by reallocating the
> + * sessions arrary memory to get rid of the mdsc->mutex
> + * lock.
> + */
> +retry:
> + max_sessions = mdsc->max_sessions;
> +
> /*
> * Trigger to flush the journal logs in all the relevant MDSes
> * manually, or in the worst case we must wait at most 5 seconds
> * to wait the journal logs to be flushed by the MDSes periodically.
> */
> - if (req1 || req2) {
> + if ((req1 || req2) && likely(max_sessions)) {
> struct ceph_mds_session **sessions = NULL;
> struct ceph_mds_session *s;
> struct ceph_mds_request *req;
> - unsigned int max;
> int i;
>
> - /*
> - * The mdsc->max_sessions is unlikely to be changed
> - * mostly, here we will retry it by reallocating the
> - * sessions arrary memory to get rid of the mdsc->mutex
> - * lock.
> - */
> -retry:
> - max = mdsc->max_sessions;
> - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
> - if (!sessions)
> - return -ENOMEM;
> + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
> + if (!sessions) {
> + err = -ENOMEM;
> + goto out;
> + }
>
> spin_lock(&ci->i_unsafe_lock);
> if (req1) {
> list_for_each_entry(req, &ci->i_unsafe_dirops,
> r_unsafe_dir_item) {
> s = req->r_session;
> - if (unlikely(s->s_mds >= max)) {
> + if (unlikely(s->s_mds >= max_sessions)) {
> spin_unlock(&ci->i_unsafe_lock);
> + for (i = 0; i < max_sessions; i++) {
> + s = sessions[i];
> + if (s)
> + ceph_put_mds_session(s);
> + }
> + kfree(sessions);
> goto retry;
> }
> if (!sessions[s->s_mds]) {
> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
> list_for_each_entry(req, &ci->i_unsafe_iops,
> r_unsafe_target_item) {
> s = req->r_session;
> - if (unlikely(s->s_mds >= max)) {
> + if (unlikely(s->s_mds >= max_sessions)) {
> spin_unlock(&ci->i_unsafe_lock);
> + for (i = 0; i < max_sessions; i++) {
> + s = sessions[i];
> + if (s)
> + ceph_put_mds_session(s);
> + }
> + kfree(sessions);
> goto retry;
> }
> if (!sessions[s->s_mds]) {
> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
> spin_unlock(&ci->i_ceph_lock);
>
> /* send flush mdlog request to MDSes */
> - for (i = 0; i < max; i++) {
> + for (i = 0; i < max_sessions; i++) {
> s = sessions[i];
> if (s) {
> send_flush_mdlog(s);
> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
> ceph_timeout_jiffies(req1->r_timeout));
> if (ret)
> err = -EIO;
> - ceph_mdsc_put_request(req1);
> }
> if (req2) {
> ret = !wait_for_completion_timeout(&req2->r_safe_completion,
> ceph_timeout_jiffies(req2->r_timeout));
> if (ret)
> err = -EIO;
> - ceph_mdsc_put_request(req2);
> }
> +
> +out:
> + if (req1)
> + ceph_mdsc_put_request(req1);
> + if (req2)
> + ceph_mdsc_put_request(req2);
> return err;
> }
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ceph: put the requests/sessions when it fails to alloc memory
2022-01-12 4:29 [PATCH] ceph: put the requests/sessions when it fails to alloc memory xiubli
2022-01-13 1:39 ` Xiubo Li
@ 2022-01-13 7:05 ` Venky Shankar
2022-01-13 11:01 ` Jeff Layton
2 siblings, 0 replies; 5+ messages in thread
From: Venky Shankar @ 2022-01-13 7:05 UTC (permalink / raw)
To: Xiubo Li; +Cc: Jeff Layton, Ilya Dryomov, ceph-devel
On Wed, Jan 12, 2022 at 9:59 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> When failing to allocate the sessions memory we should make sure
> the req1 and req2 and the sessions get put. And also in case the
> max_sessions decreased so when kreallocate the new memory some
> sessions maybe missed being put.
>
> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
> which will lead to a distinct access fault.
>
> URL: https://tracker.ceph.com/issues/53819
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
> ---
> fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 944b18b4e217..5c2719f66f62 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
> struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_mds_request *req1 = NULL, *req2 = NULL;
> + unsigned int max_sessions;
> int ret, err = 0;
>
> spin_lock(&ci->i_unsafe_lock);
> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
> }
> spin_unlock(&ci->i_unsafe_lock);
>
> + /*
> + * The mdsc->max_sessions is unlikely to be changed
> + * mostly, here we will retry it by reallocating the
> + * sessions arrary memory to get rid of the mdsc->mutex
> + * lock.
> + */
> +retry:
> + max_sessions = mdsc->max_sessions;
> +
> /*
> * Trigger to flush the journal logs in all the relevant MDSes
> * manually, or in the worst case we must wait at most 5 seconds
> * to wait the journal logs to be flushed by the MDSes periodically.
> */
> - if (req1 || req2) {
> + if ((req1 || req2) && likely(max_sessions)) {
> struct ceph_mds_session **sessions = NULL;
> struct ceph_mds_session *s;
> struct ceph_mds_request *req;
> - unsigned int max;
> int i;
>
> - /*
> - * The mdsc->max_sessions is unlikely to be changed
> - * mostly, here we will retry it by reallocating the
> - * sessions arrary memory to get rid of the mdsc->mutex
> - * lock.
> - */
"array"
> -retry:
> - max = mdsc->max_sessions;
> - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
> - if (!sessions)
> - return -ENOMEM;
> + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
> + if (!sessions) {
> + err = -ENOMEM;
> + goto out;
> + }
>
> spin_lock(&ci->i_unsafe_lock);
> if (req1) {
> list_for_each_entry(req, &ci->i_unsafe_dirops,
> r_unsafe_dir_item) {
> s = req->r_session;
> - if (unlikely(s->s_mds >= max)) {
> + if (unlikely(s->s_mds >= max_sessions)) {
> spin_unlock(&ci->i_unsafe_lock);
> + for (i = 0; i < max_sessions; i++) {
> + s = sessions[i];
> + if (s)
> + ceph_put_mds_session(s);
> + }
> + kfree(sessions);
nit: this cleanup can be a separate function since it gets repeated below.
> goto retry;
> }
> if (!sessions[s->s_mds]) {
> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
> list_for_each_entry(req, &ci->i_unsafe_iops,
> r_unsafe_target_item) {
> s = req->r_session;
> - if (unlikely(s->s_mds >= max)) {
> + if (unlikely(s->s_mds >= max_sessions)) {
> spin_unlock(&ci->i_unsafe_lock);
> + for (i = 0; i < max_sessions; i++) {
> + s = sessions[i];
> + if (s)
> + ceph_put_mds_session(s);
> + }
> + kfree(sessions);
> goto retry;
> }
> if (!sessions[s->s_mds]) {
> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
> spin_unlock(&ci->i_ceph_lock);
>
> /* send flush mdlog request to MDSes */
> - for (i = 0; i < max; i++) {
> + for (i = 0; i < max_sessions; i++) {
> s = sessions[i];
> if (s) {
> send_flush_mdlog(s);
> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
> ceph_timeout_jiffies(req1->r_timeout));
> if (ret)
> err = -EIO;
> - ceph_mdsc_put_request(req1);
> }
> if (req2) {
> ret = !wait_for_completion_timeout(&req2->r_safe_completion,
> ceph_timeout_jiffies(req2->r_timeout));
> if (ret)
> err = -EIO;
> - ceph_mdsc_put_request(req2);
> }
> +
> +out:
> + if (req1)
> + ceph_mdsc_put_request(req1);
> + if (req2)
> + ceph_mdsc_put_request(req2);
> return err;
> }
Looks good.
Reviewed-by: Venky Shankar <vshankar@redhat.com>
>
> --
> 2.27.0
>
--
Cheers,
Venky
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ceph: put the requests/sessions when it fails to alloc memory
2022-01-12 4:29 [PATCH] ceph: put the requests/sessions when it fails to alloc memory xiubli
2022-01-13 1:39 ` Xiubo Li
2022-01-13 7:05 ` Venky Shankar
@ 2022-01-13 11:01 ` Jeff Layton
2022-01-13 12:43 ` Xiubo Li
2 siblings, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2022-01-13 11:01 UTC (permalink / raw)
To: xiubli; +Cc: idryomov, vshankar, ceph-devel
On Wed, 2022-01-12 at 12:29 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> When failing to allocate the sessions memory we should make sure
> the req1 and req2 and the sessions get put. And also in case the
> max_sessions decreased so when kreallocate the new memory some
> sessions maybe missed being put.
>
> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
> which will lead to a distinct access fault.
>
> URL: https://tracker.ceph.com/issues/53819
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
> ---
> fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 944b18b4e217..5c2719f66f62 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
> struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_mds_request *req1 = NULL, *req2 = NULL;
> + unsigned int max_sessions;
> int ret, err = 0;
>
> spin_lock(&ci->i_unsafe_lock);
> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
> }
> spin_unlock(&ci->i_unsafe_lock);
>
> + /*
> + * The mdsc->max_sessions is unlikely to be changed
> + * mostly, here we will retry it by reallocating the
> + * sessions arrary memory to get rid of the mdsc->mutex
> + * lock.
> + */
> +retry:
> + max_sessions = mdsc->max_sessions;
> +
> /*
> * Trigger to flush the journal logs in all the relevant MDSes
> * manually, or in the worst case we must wait at most 5 seconds
> * to wait the journal logs to be flushed by the MDSes periodically.
> */
> - if (req1 || req2) {
> + if ((req1 || req2) && likely(max_sessions)) {
> struct ceph_mds_session **sessions = NULL;
> struct ceph_mds_session *s;
> struct ceph_mds_request *req;
> - unsigned int max;
> int i;
>
> - /*
> - * The mdsc->max_sessions is unlikely to be changed
> - * mostly, here we will retry it by reallocating the
> - * sessions arrary memory to get rid of the mdsc->mutex
> - * lock.
> - */
> -retry:
> - max = mdsc->max_sessions;
> - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
> - if (!sessions)
> - return -ENOMEM;
> + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
> + if (!sessions) {
> + err = -ENOMEM;
> + goto out;
> + }
>
> spin_lock(&ci->i_unsafe_lock);
> if (req1) {
> list_for_each_entry(req, &ci->i_unsafe_dirops,
> r_unsafe_dir_item) {
> s = req->r_session;
> - if (unlikely(s->s_mds >= max)) {
> + if (unlikely(s->s_mds >= max_sessions)) {
> spin_unlock(&ci->i_unsafe_lock);
> + for (i = 0; i < max_sessions; i++) {
> + s = sessions[i];
> + if (s)
> + ceph_put_mds_session(s);
> + }
> + kfree(sessions);
> goto retry;
> }
> if (!sessions[s->s_mds]) {
> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
> list_for_each_entry(req, &ci->i_unsafe_iops,
> r_unsafe_target_item) {
> s = req->r_session;
> - if (unlikely(s->s_mds >= max)) {
> + if (unlikely(s->s_mds >= max_sessions)) {
> spin_unlock(&ci->i_unsafe_lock);
> + for (i = 0; i < max_sessions; i++) {
> + s = sessions[i];
> + if (s)
> + ceph_put_mds_session(s);
> + }
> + kfree(sessions);
> goto retry;
> }
> if (!sessions[s->s_mds]) {
> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
> spin_unlock(&ci->i_ceph_lock);
>
> /* send flush mdlog request to MDSes */
> - for (i = 0; i < max; i++) {
> + for (i = 0; i < max_sessions; i++) {
> s = sessions[i];
> if (s) {
> send_flush_mdlog(s);
> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
> ceph_timeout_jiffies(req1->r_timeout));
> if (ret)
> err = -EIO;
> - ceph_mdsc_put_request(req1);
> }
> if (req2) {
> ret = !wait_for_completion_timeout(&req2->r_safe_completion,
> ceph_timeout_jiffies(req2->r_timeout));
> if (ret)
> err = -EIO;
> - ceph_mdsc_put_request(req2);
> }
> +
> +out:
> + if (req1)
> + ceph_mdsc_put_request(req1);
> + if (req2)
> + ceph_mdsc_put_request(req2);
> return err;
> }
>
Looks good. I fixed up the minor spelling error in the comment that
Venky noticed too. Merged into testing branch.
Thanks,
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ceph: put the requests/sessions when it fails to alloc memory
2022-01-13 11:01 ` Jeff Layton
@ 2022-01-13 12:43 ` Xiubo Li
0 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2022-01-13 12:43 UTC (permalink / raw)
To: Jeff Layton; +Cc: idryomov, vshankar, ceph-devel
On 1/13/22 7:01 PM, Jeff Layton wrote:
> On Wed, 2022-01-12 at 12:29 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> When failing to allocate the sessions memory we should make sure
>> the req1 and req2 and the sessions get put. And also in case the
>> max_sessions decreased so when kreallocate the new memory some
>> sessions maybe missed being put.
>>
>> And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR,
>> which will lead to a distinct access fault.
>>
>> URL: https://tracker.ceph.com/issues/53819
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs")
>> ---
>> fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++-----------------
>> 1 file changed, 37 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 944b18b4e217..5c2719f66f62 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -2276,6 +2276,7 @@ static int unsafe_request_wait(struct inode *inode)
>> struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>> struct ceph_inode_info *ci = ceph_inode(inode);
>> struct ceph_mds_request *req1 = NULL, *req2 = NULL;
>> + unsigned int max_sessions;
>> int ret, err = 0;
>>
>> spin_lock(&ci->i_unsafe_lock);
>> @@ -2293,37 +2294,45 @@ static int unsafe_request_wait(struct inode *inode)
>> }
>> spin_unlock(&ci->i_unsafe_lock);
>>
>> + /*
>> + * The mdsc->max_sessions is unlikely to be changed
>> + * mostly, here we will retry it by reallocating the
>> + * sessions arrary memory to get rid of the mdsc->mutex
>> + * lock.
>> + */
>> +retry:
>> + max_sessions = mdsc->max_sessions;
>> +
>> /*
>> * Trigger to flush the journal logs in all the relevant MDSes
>> * manually, or in the worst case we must wait at most 5 seconds
>> * to wait the journal logs to be flushed by the MDSes periodically.
>> */
>> - if (req1 || req2) {
>> + if ((req1 || req2) && likely(max_sessions)) {
>> struct ceph_mds_session **sessions = NULL;
>> struct ceph_mds_session *s;
>> struct ceph_mds_request *req;
>> - unsigned int max;
>> int i;
>>
>> - /*
>> - * The mdsc->max_sessions is unlikely to be changed
>> - * mostly, here we will retry it by reallocating the
>> - * sessions arrary memory to get rid of the mdsc->mutex
>> - * lock.
>> - */
>> -retry:
>> - max = mdsc->max_sessions;
>> - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
>> - if (!sessions)
>> - return -ENOMEM;
>> + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL);
>> + if (!sessions) {
>> + err = -ENOMEM;
>> + goto out;
>> + }
>>
>> spin_lock(&ci->i_unsafe_lock);
>> if (req1) {
>> list_for_each_entry(req, &ci->i_unsafe_dirops,
>> r_unsafe_dir_item) {
>> s = req->r_session;
>> - if (unlikely(s->s_mds >= max)) {
>> + if (unlikely(s->s_mds >= max_sessions)) {
>> spin_unlock(&ci->i_unsafe_lock);
>> + for (i = 0; i < max_sessions; i++) {
>> + s = sessions[i];
>> + if (s)
>> + ceph_put_mds_session(s);
>> + }
>> + kfree(sessions);
>> goto retry;
>> }
>> if (!sessions[s->s_mds]) {
>> @@ -2336,8 +2345,14 @@ static int unsafe_request_wait(struct inode *inode)
>> list_for_each_entry(req, &ci->i_unsafe_iops,
>> r_unsafe_target_item) {
>> s = req->r_session;
>> - if (unlikely(s->s_mds >= max)) {
>> + if (unlikely(s->s_mds >= max_sessions)) {
>> spin_unlock(&ci->i_unsafe_lock);
>> + for (i = 0; i < max_sessions; i++) {
>> + s = sessions[i];
>> + if (s)
>> + ceph_put_mds_session(s);
>> + }
>> + kfree(sessions);
>> goto retry;
>> }
>> if (!sessions[s->s_mds]) {
>> @@ -2358,7 +2373,7 @@ static int unsafe_request_wait(struct inode *inode)
>> spin_unlock(&ci->i_ceph_lock);
>>
>> /* send flush mdlog request to MDSes */
>> - for (i = 0; i < max; i++) {
>> + for (i = 0; i < max_sessions; i++) {
>> s = sessions[i];
>> if (s) {
>> send_flush_mdlog(s);
>> @@ -2375,15 +2390,19 @@ static int unsafe_request_wait(struct inode *inode)
>> ceph_timeout_jiffies(req1->r_timeout));
>> if (ret)
>> err = -EIO;
>> - ceph_mdsc_put_request(req1);
>> }
>> if (req2) {
>> ret = !wait_for_completion_timeout(&req2->r_safe_completion,
>> ceph_timeout_jiffies(req2->r_timeout));
>> if (ret)
>> err = -EIO;
>> - ceph_mdsc_put_request(req2);
>> }
>> +
>> +out:
>> + if (req1)
>> + ceph_mdsc_put_request(req1);
>> + if (req2)
>> + ceph_mdsc_put_request(req2);
>> return err;
>> }
>>
> Looks good. I fixed up the minor spelling error in the comment that
> Venky noticed too. Merged into testing branch.
Okay, thanks Venky and Jeff.
>
> Thanks,
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-01-13 12:43 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-12 4:29 [PATCH] ceph: put the requests/sessions when it fails to alloc memory xiubli
2022-01-13 1:39 ` Xiubo Li
2022-01-13 7:05 ` Venky Shankar
2022-01-13 11:01 ` Jeff Layton
2022-01-13 12:43 ` Xiubo Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.