All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/2] ceph: fix dead lock and double lock
@ 2020-05-27  1:42 xiubli
  2020-05-27  1:42 ` [PATCH v3 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
  2020-05-27  1:42 ` [PATCH v3 2/2] ceph: do not check the caps when reconnecting to mds xiubli
  0 siblings, 2 replies; 5+ messages in thread
From: xiubli @ 2020-05-27  1:42 UTC (permalink / raw)
  To: jlayton, idryomov, zyan; +Cc: pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Changed in V2:
- do not check the caps when reconnecting to mds
- switch ceph_async_check_caps() to ceph_async_put_cap_refs()

Changed in V3:
- fix putting the cap refs leak

By adding the put_cap_refs's queue work we can avoid the 'mdsc->mutex' and
'session->s_mutex' double lock issue and also the dead lock issue of them.
There at least 10+ places have the above issues and most of them are caused
by calling the ceph_mdsc_put_request() when releasing the 'req'.

Xiubo Li (2):
  ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
  ceph: do not check the caps when reconnecting to mds

 fs/ceph/caps.c       | 98 +++++++++++++++++++++++++++++++++++++++++++++-------
 fs/ceph/dir.c        |  2 +-
 fs/ceph/file.c       |  2 +-
 fs/ceph/inode.c      |  3 ++
 fs/ceph/mds_client.c | 14 +++++---
 fs/ceph/mds_client.h |  3 +-
 fs/ceph/super.h      |  7 ++++
 7 files changed, 110 insertions(+), 19 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
  2020-05-27  1:42 [PATCH v3 0/2] ceph: fix dead lock and double lock xiubli
@ 2020-05-27  1:42 ` xiubli
  2020-05-27  2:16   ` Yan, Zheng
  2020-05-27  1:42 ` [PATCH v3 2/2] ceph: do not check the caps when reconnecting to mds xiubli
  1 sibling, 1 reply; 5+ messages in thread
From: xiubli @ 2020-05-27  1:42 UTC (permalink / raw)
  To: jlayton, idryomov, zyan; +Cc: pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

In the ceph_check_caps() it may call the session->s_mutex lock/unlock
and in ceph_flush_snaps() it may call the mdsc->mutex lock/unlock. And
both of them called from ceph_mdsc_put_request(), which may under the
session or mdsc mutex lock/unlock pair already, we will hit the dead
lock or double lock issue.

There have some deadlock cases, like:
handle_forward()
...
mutex_lock(&mdsc->mutex)
...
ceph_mdsc_put_request()
  --> ceph_mdsc_release_request()
    --> ceph_put_cap_request()
      --> ceph_put_cap_refs()
        --> ceph_check_caps()
...
mutex_unlock(&mdsc->mutex)

And also there maybe has some double session lock cases, like:

send_mds_reconnect()
...
mutex_lock(&session->s_mutex);
...
  --> replay_unsafe_requests()
    --> ceph_mdsc_release_dir_caps()
      --> ceph_put_cap_refs()
        --> ceph_check_caps()
...
mutex_unlock(&session->s_mutex);

This patch will just delays to call them in a queue work to avoid
the dead lock and double lock issues.

URL: https://tracker.ceph.com/issues/45635
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 89 ++++++++++++++++++++++++++++++++++++++++++++--------
 fs/ceph/inode.c      |  3 ++
 fs/ceph/mds_client.c |  2 +-
 fs/ceph/super.h      |  5 +++
 4 files changed, 85 insertions(+), 14 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 27c2e60..996bedb 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -9,6 +9,7 @@
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/iversion.h>
+#include <linux/bits.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -3007,19 +3008,15 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
 	return 0;
 }
 
-/*
- * Release cap refs.
- *
- * If we released the last ref on any given cap, call ceph_check_caps
- * to release (or schedule a release).
- *
- * If we are releasing a WR cap (from a sync write), finalize any affected
- * cap_snap, and wake up any waiters.
- */
-void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
+#define CAP_REF_LAST_BIT       0
+#define CAP_REF_FLUSHSNAPS_BIT 1
+#define CAP_REF_WAKE_BIT       2
+
+static int __ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 {
 	struct inode *inode = &ci->vfs_inode;
 	int last = 0, put = 0, flushsnaps = 0, wake = 0;
+	unsigned long flags = 0;
 
 	spin_lock(&ci->i_ceph_lock);
 	if (had & CEPH_CAP_PIN)
@@ -3073,13 +3070,79 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 	     last ? " last" : "", put ? " put" : "");
 
 	if (last)
-		ceph_check_caps(ci, 0, NULL);
+		set_bit(CAP_REF_LAST_BIT, &flags);
 	else if (flushsnaps)
-		ceph_flush_snaps(ci, NULL);
+		set_bit(CAP_REF_FLUSHSNAPS_BIT, &flags);
 	if (wake)
-		wake_up_all(&ci->i_cap_wq);
+		set_bit(CAP_REF_WAKE_BIT, &flags);
 	while (put-- > 0)
 		iput(inode);
+
+	return flags;
+}
+
+/*
+ * This is the bottow half of __ceph_put_cap_refs(), which
+ * may take the mdsc->mutex and session->s_mutex and this
+ * will be called in a queue work to void dead/double lock
+ * issues if called from ceph_mdsc_put_request().
+ */
+static void __ceph_put_cap_refs_bh(struct ceph_inode_info *ci,
+				   unsigned long flags)
+{
+	if (test_bit(CAP_REF_LAST_BIT, &flags))
+		ceph_check_caps(ci, 0, NULL);
+	else if (test_bit(CAP_REF_FLUSHSNAPS_BIT, &flags))
+		ceph_flush_snaps(ci, NULL);
+	if (test_bit(CAP_REF_WAKE_BIT, &flags))
+		wake_up_all(&ci->i_cap_wq);
+}
+
+/*
+ * Release cap refs.
+ *
+ * If we released the last ref on any given cap, call ceph_check_caps
+ * to release (or schedule a release).
+ *
+ * If we are releasing a WR cap (from a sync write), finalize any affected
+ * cap_snap, and wake up any waiters.
+ */
+void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
+{
+	unsigned long flags;
+
+	flags = __ceph_put_cap_refs(ci, had);
+	__ceph_put_cap_refs_bh(ci, flags);
+}
+
+void ceph_async_put_cap_refs_work(struct work_struct *work)
+{
+	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+						  put_cap_refs_work);
+	unsigned long flags;
+
+	spin_lock(&ci->i_ceph_lock);
+	flags = xchg(&ci->pending_put_flags, 0);
+	spin_unlock(&ci->i_ceph_lock);
+	if (!flags)
+		return;
+
+	__ceph_put_cap_refs_bh(ci, flags);
+}
+
+void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
+{
+	struct inode *inode = &ci->vfs_inode;
+	unsigned long flags;
+
+	flags = __ceph_put_cap_refs(ci, had);
+
+	spin_lock(&ci->i_ceph_lock);
+	ci->pending_put_flags |= flags;
+	spin_unlock(&ci->i_ceph_lock);
+
+	queue_work(ceph_inode_to_client(inode)->inode_wq,
+		   &ci->put_cap_refs_work);
 }
 
 /*
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 357c937..e0ea508 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	INIT_LIST_HEAD(&ci->i_snap_realm_item);
 	INIT_LIST_HEAD(&ci->i_snap_flush_item);
 
+	INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
+	ci->pending_put_flags = 0;
+
 	INIT_WORK(&ci->i_work, ceph_inode_work);
 	ci->i_work_mask = 0;
 	memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0e0ab01..12506b7 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3398,7 +3398,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
 	dcaps = xchg(&req->r_dir_caps, 0);
 	if (dcaps) {
 		dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
-		ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
+		ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
 	}
 }
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 226f19c..ece94fc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -421,6 +421,9 @@ struct ceph_inode_info {
 	struct timespec64 i_btime;
 	struct timespec64 i_snap_btime;
 
+	struct work_struct put_cap_refs_work;
+	unsigned long pending_put_flags;
+
 	struct work_struct i_work;
 	unsigned long  i_work_mask;
 
@@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
 				bool snap_rwsem_locked);
 extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
 extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_async_put_cap_refs_work(struct work_struct *work);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v3 2/2] ceph: do not check the caps when reconnecting to mds
  2020-05-27  1:42 [PATCH v3 0/2] ceph: fix dead lock and double lock xiubli
  2020-05-27  1:42 ` [PATCH v3 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
@ 2020-05-27  1:42 ` xiubli
  1 sibling, 0 replies; 5+ messages in thread
From: xiubli @ 2020-05-27  1:42 UTC (permalink / raw)
  To: jlayton, idryomov, zyan; +Cc: pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

It make no sense to check the caps when reconnecting to mds.

URL: https://tracker.ceph.com/issues/45635
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       |  9 +++++++++
 fs/ceph/dir.c        |  2 +-
 fs/ceph/file.c       |  2 +-
 fs/ceph/mds_client.c | 14 ++++++++++----
 fs/ceph/mds_client.h |  3 ++-
 fs/ceph/super.h      |  2 ++
 6 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 996bedb..1d3301b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3115,6 +3115,15 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 	__ceph_put_cap_refs_bh(ci, flags);
 }
 
+void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
+{
+	unsigned long flags;
+
+	flags = __ceph_put_cap_refs(ci, had);
+	clear_bit(CAP_REF_LAST_BIT, &flags);
+	__ceph_put_cap_refs_bh(ci, flags);
+}
+
 void ceph_async_put_cap_refs_work(struct work_struct *work)
 {
 	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 39f5311..9f66ea5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1079,7 +1079,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
 	}
 out:
 	iput(req->r_old_inode);
-	ceph_mdsc_release_dir_caps(req);
+	ceph_mdsc_release_dir_caps(req, false);
 }
 
 static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 160644d..812da94 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -565,7 +565,7 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
 			req->r_deleg_ino);
 	}
 out:
-	ceph_mdsc_release_dir_caps(req);
+	ceph_mdsc_release_dir_caps(req, false);
 }
 
 static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 12506b7..ec674f2 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -804,7 +804,7 @@ void ceph_mdsc_release_request(struct kref *kref)
 	struct ceph_mds_request *req = container_of(kref,
 						    struct ceph_mds_request,
 						    r_kref);
-	ceph_mdsc_release_dir_caps(req);
+	ceph_mdsc_release_dir_caps(req, false);
 	destroy_reply_info(&req->r_reply_info);
 	if (req->r_request)
 		ceph_msg_put(req->r_request);
@@ -3391,14 +3391,20 @@ static void handle_session(struct ceph_mds_session *session,
 	return;
 }
 
-void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
+void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req,
+				bool skip_checking_caps)
 {
 	int dcaps;
 
 	dcaps = xchg(&req->r_dir_caps, 0);
 	if (dcaps) {
 		dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
-		ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
+		if (skip_checking_caps)
+			ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
+							dcaps);
+		else
+			ceph_async_put_cap_refs(ceph_inode(req->r_parent),
+						dcaps);
 	}
 }
 
@@ -3434,7 +3440,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 		if (req->r_session->s_mds != session->s_mds)
 			continue;
 
-		ceph_mdsc_release_dir_caps(req);
+		ceph_mdsc_release_dir_caps(req, true);
 
 		__send_request(mdsc, session, req, true);
 	}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 43111e4..73ee022 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -506,7 +506,8 @@ extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
 extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 				struct inode *dir,
 				struct ceph_mds_request *req);
-extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
+extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req,
+				       bool skip_checking_caps);
 static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
 {
 	kref_get(&req->r_kref);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ece94fc..5be5652 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1098,6 +1098,8 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
 				bool snap_rwsem_locked);
 extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
 extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
+					    int had);
 extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had);
 extern void ceph_async_put_cap_refs_work(struct work_struct *work);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
  2020-05-27  1:42 ` [PATCH v3 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
@ 2020-05-27  2:16   ` Yan, Zheng
  2020-05-27  6:38     ` Xiubo Li
  0 siblings, 1 reply; 5+ messages in thread
From: Yan, Zheng @ 2020-05-27  2:16 UTC (permalink / raw)
  To: xiubli, jlayton, idryomov; +Cc: pdonnell, ceph-devel

On 5/27/20 9:42 AM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> In the ceph_check_caps() it may call the session->s_mutex lock/unlock
> and in ceph_flush_snaps() it may call the mdsc->mutex lock/unlock. And
> both of them called from ceph_mdsc_put_request(), which may under the
> session or mdsc mutex lock/unlock pair already, we will hit the dead
> lock or double lock issue.
> 
> There have some deadlock cases, like:
> handle_forward()

MDS should never forward an async request.

> ...
> mutex_lock(&mdsc->mutex)
> ...
> ceph_mdsc_put_request()
>    --> ceph_mdsc_release_request()
>      --> ceph_put_cap_request()
>        --> ceph_put_cap_refs()
>          --> ceph_check_caps()
> ...
> mutex_unlock(&mdsc->mutex)

In normal case, async dirop caps should be put by ceph_async_foo_cb()
This only happens when session gets closed and cleaning up async 
requests. calling ceph_put_cap_refs_no_check_caps() here should work.

> 
> And also there maybe has some double session lock cases, like:
> 
> send_mds_reconnect()
> ...
> mutex_lock(&session->s_mutex);
> ...
>    --> replay_unsafe_requests()
>      --> ceph_mdsc_release_dir_caps()
>        --> ceph_put_cap_refs()
>          --> ceph_check_caps()
> ...
> mutex_unlock(&session->s_mutex);

calling ceph_put_cap_refs_no_check_caps() here should work too.


> 
> This patch will just delays to call them in a queue work to avoid
> the dead lock and double lock issues.
> 

The issue happens only in a few rare cases. I don't think it's worth to 
call all ceph_check_caps() async.


Regards
Yan, Zheng

> URL: https://tracker.ceph.com/issues/45635
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   fs/ceph/caps.c       | 89 ++++++++++++++++++++++++++++++++++++++++++++--------
>   fs/ceph/inode.c      |  3 ++
>   fs/ceph/mds_client.c |  2 +-
>   fs/ceph/super.h      |  5 +++
>   4 files changed, 85 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 27c2e60..996bedb 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -9,6 +9,7 @@
>   #include <linux/wait.h>
>   #include <linux/writeback.h>
>   #include <linux/iversion.h>
> +#include <linux/bits.h>
>   
>   #include "super.h"
>   #include "mds_client.h"
> @@ -3007,19 +3008,15 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
>   	return 0;
>   }
>   
> -/*
> - * Release cap refs.
> - *
> - * If we released the last ref on any given cap, call ceph_check_caps
> - * to release (or schedule a release).
> - *
> - * If we are releasing a WR cap (from a sync write), finalize any affected
> - * cap_snap, and wake up any waiters.
> - */
> -void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
> +#define CAP_REF_LAST_BIT       0
> +#define CAP_REF_FLUSHSNAPS_BIT 1
> +#define CAP_REF_WAKE_BIT       2
> +
> +static int __ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
>   {
>   	struct inode *inode = &ci->vfs_inode;
>   	int last = 0, put = 0, flushsnaps = 0, wake = 0;
> +	unsigned long flags = 0;
>   
>   	spin_lock(&ci->i_ceph_lock);
>   	if (had & CEPH_CAP_PIN)
> @@ -3073,13 +3070,79 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
>   	     last ? " last" : "", put ? " put" : "");
>   
>   	if (last)
> -		ceph_check_caps(ci, 0, NULL);
> +		set_bit(CAP_REF_LAST_BIT, &flags);
>   	else if (flushsnaps)
> -		ceph_flush_snaps(ci, NULL);
> +		set_bit(CAP_REF_FLUSHSNAPS_BIT, &flags);
>   	if (wake)
> -		wake_up_all(&ci->i_cap_wq);
> +		set_bit(CAP_REF_WAKE_BIT, &flags);
>   	while (put-- > 0)
>   		iput(inode);
> +
> +	return flags;
> +}
> +
> +/*
> + * This is the bottow half of __ceph_put_cap_refs(), which
> + * may take the mdsc->mutex and session->s_mutex and this
> + * will be called in a queue work to void dead/double lock
> + * issues if called from ceph_mdsc_put_request().
> + */
> +static void __ceph_put_cap_refs_bh(struct ceph_inode_info *ci,
> +				   unsigned long flags)
> +{
> +	if (test_bit(CAP_REF_LAST_BIT, &flags))
> +		ceph_check_caps(ci, 0, NULL);
> +	else if (test_bit(CAP_REF_FLUSHSNAPS_BIT, &flags))
> +		ceph_flush_snaps(ci, NULL);
> +	if (test_bit(CAP_REF_WAKE_BIT, &flags))
> +		wake_up_all(&ci->i_cap_wq);
> +}
> +
> +/*
> + * Release cap refs.
> + *
> + * If we released the last ref on any given cap, call ceph_check_caps
> + * to release (or schedule a release).
> + *
> + * If we are releasing a WR cap (from a sync write), finalize any affected
> + * cap_snap, and wake up any waiters.
> + */
> +void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
> +{
> +	unsigned long flags;
> +
> +	flags = __ceph_put_cap_refs(ci, had);
> +	__ceph_put_cap_refs_bh(ci, flags);
> +}
> +
> +void ceph_async_put_cap_refs_work(struct work_struct *work)
> +{
> +	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
> +						  put_cap_refs_work);
> +	unsigned long flags;
> +
> +	spin_lock(&ci->i_ceph_lock);
> +	flags = xchg(&ci->pending_put_flags, 0);
> +	spin_unlock(&ci->i_ceph_lock);
> +	if (!flags)
> +		return;
> +
> +	__ceph_put_cap_refs_bh(ci, flags);
> +}
> +
> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
> +{
> +	struct inode *inode = &ci->vfs_inode;
> +	unsigned long flags;
> +
> +	flags = __ceph_put_cap_refs(ci, had);
> +
> +	spin_lock(&ci->i_ceph_lock);
> +	ci->pending_put_flags |= flags;
> +	spin_unlock(&ci->i_ceph_lock);
> +
> +	queue_work(ceph_inode_to_client(inode)->inode_wq,
> +		   &ci->put_cap_refs_work);
>   }
>   
>   /*
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 357c937..e0ea508 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>   	INIT_LIST_HEAD(&ci->i_snap_realm_item);
>   	INIT_LIST_HEAD(&ci->i_snap_flush_item);
>   
> +	INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
> +	ci->pending_put_flags = 0;
> +
>   	INIT_WORK(&ci->i_work, ceph_inode_work);
>   	ci->i_work_mask = 0;
>   	memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 0e0ab01..12506b7 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -3398,7 +3398,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
>   	dcaps = xchg(&req->r_dir_caps, 0);
>   	if (dcaps) {
>   		dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
> -		ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> +		ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>   	}
>   }
>   
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 226f19c..ece94fc 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -421,6 +421,9 @@ struct ceph_inode_info {
>   	struct timespec64 i_btime;
>   	struct timespec64 i_snap_btime;
>   
> +	struct work_struct put_cap_refs_work;
> +	unsigned long pending_put_flags;
> +
>   	struct work_struct i_work;
>   	unsigned long  i_work_mask;
>   
> @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
>   				bool snap_rwsem_locked);
>   extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
>   extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had);
> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
>   extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
>   				       struct ceph_snap_context *snapc);
>   extern void ceph_flush_snaps(struct ceph_inode_info *ci,
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
  2020-05-27  2:16   ` Yan, Zheng
@ 2020-05-27  6:38     ` Xiubo Li
  0 siblings, 0 replies; 5+ messages in thread
From: Xiubo Li @ 2020-05-27  6:38 UTC (permalink / raw)
  To: Yan, Zheng, jlayton, idryomov; +Cc: pdonnell, ceph-devel

On 2020/5/27 10:16, Yan, Zheng wrote:
> On 5/27/20 9:42 AM, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> In the ceph_check_caps() it may call the session->s_mutex lock/unlock
>> and in ceph_flush_snaps() it may call the mdsc->mutex lock/unlock. And
>> both of them called from ceph_mdsc_put_request(), which may under the
>> session or mdsc mutex lock/unlock pair already, we will hit the dead
>> lock or double lock issue.
>>
>> There have some deadlock cases, like:
>> handle_forward()
>
> MDS should never forward an async request.

Okay.


>
>> ...
>> mutex_lock(&mdsc->mutex)
>> ...
>> ceph_mdsc_put_request()
>>    --> ceph_mdsc_release_request()
>>      --> ceph_put_cap_request()
>>        --> ceph_put_cap_refs()
>>          --> ceph_check_caps()
>> ...
>> mutex_unlock(&mdsc->mutex)
>
> In normal case, async dirop caps should be put by ceph_async_foo_cb()
> This only happens when session gets closed and cleaning up async 
> requests. calling ceph_put_cap_refs_no_check_caps() here should work.
>
Checked it again, yeah, you are right. In normal case, the 
ceph_mdsc_release_dir_caps() in ceph_mdsc_release_request() shall never 
be called.

>>
>> And also there maybe has some double session lock cases, like:
>>
>> send_mds_reconnect()
>> ...
>> mutex_lock(&session->s_mutex);
>> ...
>>    --> replay_unsafe_requests()
>>      --> ceph_mdsc_release_dir_caps()
>>        --> ceph_put_cap_refs()
>>          --> ceph_check_caps()
>> ...
>> mutex_unlock(&session->s_mutex);
>
> calling ceph_put_cap_refs_no_check_caps() here should work too.
>
>
>>
>> This patch will just delays to call them in a queue work to avoid
>> the dead lock and double lock issues.
>>
>
> The issue happens only in a few rare cases. I don't think it's worth 
> to call all ceph_check_caps() async.
>
Will drop the async check caps stuff.

Thanks
BRs
Xiubo


>
> Regards
> Yan, Zheng
>
>> URL: https://tracker.ceph.com/issues/45635
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/caps.c       | 89 
>> ++++++++++++++++++++++++++++++++++++++++++++--------
>>   fs/ceph/inode.c      |  3 ++
>>   fs/ceph/mds_client.c |  2 +-
>>   fs/ceph/super.h      |  5 +++
>>   4 files changed, 85 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 27c2e60..996bedb 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -9,6 +9,7 @@
>>   #include <linux/wait.h>
>>   #include <linux/writeback.h>
>>   #include <linux/iversion.h>
>> +#include <linux/bits.h>
>>     #include "super.h"
>>   #include "mds_client.h"
>> @@ -3007,19 +3008,15 @@ static int ceph_try_drop_cap_snap(struct 
>> ceph_inode_info *ci,
>>       return 0;
>>   }
>>   -/*
>> - * Release cap refs.
>> - *
>> - * If we released the last ref on any given cap, call ceph_check_caps
>> - * to release (or schedule a release).
>> - *
>> - * If we are releasing a WR cap (from a sync write), finalize any 
>> affected
>> - * cap_snap, and wake up any waiters.
>> - */
>> -void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
>> +#define CAP_REF_LAST_BIT       0
>> +#define CAP_REF_FLUSHSNAPS_BIT 1
>> +#define CAP_REF_WAKE_BIT       2
>> +
>> +static int __ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
>>   {
>>       struct inode *inode = &ci->vfs_inode;
>>       int last = 0, put = 0, flushsnaps = 0, wake = 0;
>> +    unsigned long flags = 0;
>>         spin_lock(&ci->i_ceph_lock);
>>       if (had & CEPH_CAP_PIN)
>> @@ -3073,13 +3070,79 @@ void ceph_put_cap_refs(struct ceph_inode_info 
>> *ci, int had)
>>            last ? " last" : "", put ? " put" : "");
>>         if (last)
>> -        ceph_check_caps(ci, 0, NULL);
>> +        set_bit(CAP_REF_LAST_BIT, &flags);
>>       else if (flushsnaps)
>> -        ceph_flush_snaps(ci, NULL);
>> +        set_bit(CAP_REF_FLUSHSNAPS_BIT, &flags);
>>       if (wake)
>> -        wake_up_all(&ci->i_cap_wq);
>> +        set_bit(CAP_REF_WAKE_BIT, &flags);
>>       while (put-- > 0)
>>           iput(inode);
>> +
>> +    return flags;
>> +}
>> +
>> +/*
>> + * This is the bottow half of __ceph_put_cap_refs(), which
>> + * may take the mdsc->mutex and session->s_mutex and this
>> + * will be called in a queue work to void dead/double lock
>> + * issues if called from ceph_mdsc_put_request().
>> + */
>> +static void __ceph_put_cap_refs_bh(struct ceph_inode_info *ci,
>> +                   unsigned long flags)
>> +{
>> +    if (test_bit(CAP_REF_LAST_BIT, &flags))
>> +        ceph_check_caps(ci, 0, NULL);
>> +    else if (test_bit(CAP_REF_FLUSHSNAPS_BIT, &flags))
>> +        ceph_flush_snaps(ci, NULL);
>> +    if (test_bit(CAP_REF_WAKE_BIT, &flags))
>> +        wake_up_all(&ci->i_cap_wq);
>> +}
>> +
>> +/*
>> + * Release cap refs.
>> + *
>> + * If we released the last ref on any given cap, call ceph_check_caps
>> + * to release (or schedule a release).
>> + *
>> + * If we are releasing a WR cap (from a sync write), finalize any 
>> affected
>> + * cap_snap, and wake up any waiters.
>> + */
>> +void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
>> +{
>> +    unsigned long flags;
>> +
>> +    flags = __ceph_put_cap_refs(ci, had);
>> +    __ceph_put_cap_refs_bh(ci, flags);
>> +}
>> +
>> +void ceph_async_put_cap_refs_work(struct work_struct *work)
>> +{
>> +    struct ceph_inode_info *ci = container_of(work, struct 
>> ceph_inode_info,
>> +                          put_cap_refs_work);
>> +    unsigned long flags;
>> +
>> +    spin_lock(&ci->i_ceph_lock);
>> +    flags = xchg(&ci->pending_put_flags, 0);
>> +    spin_unlock(&ci->i_ceph_lock);
>> +    if (!flags)
>> +        return;
>> +
>> +    __ceph_put_cap_refs_bh(ci, flags);
>> +}
>> +
>> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
>> +{
>> +    struct inode *inode = &ci->vfs_inode;
>> +    unsigned long flags;
>> +
>> +    flags = __ceph_put_cap_refs(ci, had);
>> +
>> +    spin_lock(&ci->i_ceph_lock);
>> +    ci->pending_put_flags |= flags;
>> +    spin_unlock(&ci->i_ceph_lock);
>> +
>> +    queue_work(ceph_inode_to_client(inode)->inode_wq,
>> +           &ci->put_cap_refs_work);
>>   }
>>     /*
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index 357c937..e0ea508 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block 
>> *sb)
>>       INIT_LIST_HEAD(&ci->i_snap_realm_item);
>>       INIT_LIST_HEAD(&ci->i_snap_flush_item);
>>   +    INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
>> +    ci->pending_put_flags = 0;
>> +
>>       INIT_WORK(&ci->i_work, ceph_inode_work);
>>       ci->i_work_mask = 0;
>>       memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 0e0ab01..12506b7 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -3398,7 +3398,7 @@ void ceph_mdsc_release_dir_caps(struct 
>> ceph_mds_request *req)
>>       dcaps = xchg(&req->r_dir_caps, 0);
>>       if (dcaps) {
>>           dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
>> -        ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>> +        ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>>       }
>>   }
>>   diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 226f19c..ece94fc 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -421,6 +421,9 @@ struct ceph_inode_info {
>>       struct timespec64 i_btime;
>>       struct timespec64 i_snap_btime;
>>   +    struct work_struct put_cap_refs_work;
>> +    unsigned long pending_put_flags;
>> +
>>       struct work_struct i_work;
>>       unsigned long  i_work_mask;
>>   @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct 
>> ceph_inode_info *ci, int caps,
>>                   bool snap_rwsem_locked);
>>   extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
>>   extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
>> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int 
>> had);
>> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
>>   extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, 
>> int nr,
>>                          struct ceph_snap_context *snapc);
>>   extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-05-27  6:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-27  1:42 [PATCH v3 0/2] ceph: fix dead lock and double lock xiubli
2020-05-27  1:42 ` [PATCH v3 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
2020-05-27  2:16   ` Yan, Zheng
2020-05-27  6:38     ` Xiubo Li
2020-05-27  1:42 ` [PATCH v3 2/2] ceph: do not check the caps when reconnecting to mds xiubli

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.