All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap()
@ 2013-09-22  4:46 Yan, Zheng
  2013-09-22  4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Yan, Zheng @ 2013-09-22  4:46 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

call __queue_cap_release() in __ceph_remove_cap(), this avoids
acquiring s_cap_lock twice.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c       | 21 +++++++++++----------
 fs/ceph/mds_client.c |  6 ++----
 fs/ceph/super.h      |  8 +-------
 3 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 13976c3..d2d6e40 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
  * caller should hold i_ceph_lock.
  * caller will not hold session s_mutex if called from destroy_inode.
  */
-void __ceph_remove_cap(struct ceph_cap *cap)
+void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 {
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_inode_info *ci = cap->ci;
@@ -909,6 +909,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
 
 	/* remove from session list */
 	spin_lock(&session->s_cap_lock);
+	if (queue_release)
+		__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
+				    cap->mseq, cap->issue_seq);
+
 	if (session->s_cap_iterator == cap) {
 		/* not yet, we are iterating over this very cap */
 		dout("__ceph_remove_cap  delaying %p removal from session %p\n",
@@ -1023,7 +1027,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
 	struct ceph_mds_cap_release *head;
 	struct ceph_mds_cap_item *item;
 
-	spin_lock(&session->s_cap_lock);
 	BUG_ON(!session->s_num_cap_releases);
 	msg = list_first_entry(&session->s_cap_releases,
 			       struct ceph_msg, list_head);
@@ -1052,7 +1055,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
 		     (int)CEPH_CAPS_PER_RELEASE,
 		     (int)msg->front.iov_len);
 	}
-	spin_unlock(&session->s_cap_lock);
 }
 
 /*
@@ -1067,12 +1069,8 @@ void ceph_queue_caps_release(struct inode *inode)
 	p = rb_first(&ci->i_caps);
 	while (p) {
 		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
-		struct ceph_mds_session *session = cap->session;
-
-		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
-				    cap->mseq, cap->issue_seq);
 		p = rb_next(p);
-		__ceph_remove_cap(cap);
+		__ceph_remove_cap(cap, true);
 	}
 }
 
@@ -2791,7 +2789,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 			}
 			spin_unlock(&mdsc->cap_dirty_lock);
 		}
-		__ceph_remove_cap(cap);
+		__ceph_remove_cap(cap, false);
 	}
 	/* else, we already released it */
 
@@ -2931,9 +2929,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	if (!inode) {
 		dout(" i don't have ino %llx\n", vino.ino);
 
-		if (op == CEPH_CAP_OP_IMPORT)
+		if (op == CEPH_CAP_OP_IMPORT) {
+			spin_lock(&session->s_cap_lock);
 			__queue_cap_release(session, vino.ino, cap_id,
 					    mseq, seq);
+			spin_unlock(&session->s_cap_lock);
+		}
 		goto flush_cap_releases;
 	}
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f51ab26..8f8f5c0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -986,7 +986,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
 	spin_lock(&ci->i_ceph_lock);
-	__ceph_remove_cap(cap);
+	__ceph_remove_cap(cap, false);
 	if (!__ceph_is_any_real_caps(ci)) {
 		struct ceph_mds_client *mdsc =
 			ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1231,9 +1231,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 	session->s_trim_caps--;
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
-		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
-				    cap->mseq, cap->issue_seq);
-		__ceph_remove_cap(cap);
+		__ceph_remove_cap(cap, true);
 	} else {
 		/* try to drop referring dentries */
 		spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a538b51..8de94b5 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -742,13 +742,7 @@ extern int ceph_add_cap(struct inode *inode,
 			int fmode, unsigned issued, unsigned wanted,
 			unsigned cap, unsigned seq, u64 realmino, int flags,
 			struct ceph_cap_reservation *caps_reservation);
-extern void __ceph_remove_cap(struct ceph_cap *cap);
-static inline void ceph_remove_cap(struct ceph_cap *cap)
-{
-	spin_lock(&cap->ci->i_ceph_lock);
-	__ceph_remove_cap(cap);
-	spin_unlock(&cap->ci->i_ceph_lock);
-}
+extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
 			 struct ceph_cap *cap);
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] ceph: set caps count after composing cap reconnect message
  2013-09-22  4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
@ 2013-09-22  4:46 ` Yan, Zheng
  2013-09-22  4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
  2013-09-23  3:48 ` [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Sage Weil
  2 siblings, 0 replies; 7+ messages in thread
From: Yan, Zheng @ 2013-09-22  4:46 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

It's possible that some caps get released while composing the cap
reconnect message.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/mds_client.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8f8f5c0..6b4ed35 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -43,6 +43,7 @@
  */
 
 struct ceph_reconnect_state {
+	int nr_caps;
 	struct ceph_pagelist *pagelist;
 	bool flock;
 };
@@ -2549,6 +2550,8 @@ encode_again:
 	} else {
 		err = ceph_pagelist_append(pagelist, &rec, reclen);
 	}
+
+	recon_state->nr_caps++;
 out_free:
 	kfree(path);
 out_dput:
@@ -2576,6 +2579,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	struct rb_node *p;
 	int mds = session->s_mds;
 	int err = -ENOMEM;
+	int s_nr_caps;
 	struct ceph_pagelist *pagelist;
 	struct ceph_reconnect_state recon_state;
 
@@ -2611,10 +2615,12 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	discard_cap_releases(mdsc, session);
 
 	/* traverse this session's caps */
-	err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
+	s_nr_caps = session->s_nr_caps;
+	err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
 	if (err)
 		goto fail;
 
+	recon_state.nr_caps = 0;
 	recon_state.pagelist = pagelist;
 	recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
 	err = iterate_session_caps(session, encode_caps_cb, &recon_state);
@@ -2643,11 +2649,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 
 	if (recon_state.flock)
 		reply->hdr.version = cpu_to_le16(2);
-	if (pagelist->length) {
-		/* set up outbound data if we have any */
-		reply->hdr.data_len = cpu_to_le32(pagelist->length);
-		ceph_msg_data_add_pagelist(reply, pagelist);
-	}
+
+	/* raced with cap release? */
+	if (s_nr_caps != recon_state.nr_caps) {
+		struct page *page = list_first_entry(&pagelist->head,
+						     struct page, lru);
+		__le32 *addr = kmap_atomic(page);
+		*addr = cpu_to_le32(recon_state.nr_caps);
+		kunmap_atomic(addr);
+	}
+		
+	reply->hdr.data_len = cpu_to_le32(pagelist->length);
+	ceph_msg_data_add_pagelist(reply, pagelist);
 	ceph_con_send(&session->s_con, reply);
 
 	mutex_unlock(&session->s_mutex);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] ceph: handle race between cap reconnect and cap release
  2013-09-22  4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
  2013-09-22  4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
@ 2013-09-22  4:46 ` Yan, Zheng
  2013-09-23 14:31   ` Sage Weil
  2013-09-23  3:48 ` [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Sage Weil
  2 siblings, 1 reply; 7+ messages in thread
From: Yan, Zheng @ 2013-09-22  4:46 UTC (permalink / raw)
  To: ceph-devel; +Cc: sage, Yan, Zheng

From: "Yan, Zheng" <zheng.z.yan@intel.com>

When a cap get released while composing the cap reconnect message.
We should skip queuing the release message if the cap hasn't been
added to the cap reconnect message.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c       |  4 +++-
 fs/ceph/mds_client.c | 21 ++++++++++++++++++---
 fs/ceph/mds_client.h |  1 +
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d2d6e40..acc8564 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -909,7 +909,9 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 
 	/* remove from session list */
 	spin_lock(&session->s_cap_lock);
-	if (queue_release)
+	if (queue_release &&
+	    (!session->s_cap_reconnect ||
+	     cap->cap_gen == session->s_cap_gen))
 		__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
 				    cap->mseq, cap->issue_seq);
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6b4ed35..30397a6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	INIT_LIST_HEAD(&s->s_waiting);
 	INIT_LIST_HEAD(&s->s_unsafe);
 	s->s_num_cap_releases = 0;
+	s->s_cap_reconnect = 0;
 	s->s_cap_iterator = NULL;
 	INIT_LIST_HEAD(&s->s_cap_releases);
 	INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
 	unsigned num;
 
 	dout("discard_cap_releases mds%d\n", session->s_mds);
-	spin_lock(&session->s_cap_lock);
 
 	/* zero out the in-progress message */
 	msg = list_first_entry(&session->s_cap_releases,
@@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
 		msg->front.iov_len = sizeof(*head);
 		list_add(&msg->list_head, &session->s_cap_releases);
 	}
-
-	spin_unlock(&session->s_cap_lock);
 }
 
 /*
@@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	cap->seq = 0;        /* reset cap seq */
 	cap->issue_seq = 0;  /* and issue_seq */
 	cap->mseq = 0;       /* and migrate_seq */
+	cap->cap_gen = cap->session->s_cap_gen;
 
 	if (recon_state->flock) {
 		rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	dout("session %p state %s\n", session,
 	     session_state_name(session->s_state));
 
+	spin_lock(&session->s_gen_ttl_lock);
+	session->s_cap_gen++;
+	spin_unlock(&session->s_gen_ttl_lock);
+
+	spin_lock(&session->s_cap_lock);
+	/*
+	 * notify __ceph_remove_cap() that we are composing cap reconnect.
+	 * If a cap get released before being added to the cap reconnect,
+	 * __ceph_remove_cap() should skip queuing cap release.
+	 */
+	session->s_cap_reconnect = 1;
 	/* drop old cap expires; we're about to reestablish that state */
 	discard_cap_releases(mdsc, session);
+	spin_unlock(&session->s_cap_lock);
 
 	/* traverse this session's caps */
 	s_nr_caps = session->s_nr_caps;
@@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	if (err < 0)
 		goto fail;
 
+	spin_lock(&session->s_cap_lock);
+	session->s_cap_reconnect = 0;
+	spin_unlock(&session->s_cap_lock);
+
 	/*
 	 * snaprealms.  we provide mds with the ino, seq (version), and
 	 * parent for all of our realms.  If the mds has any newer info,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fb..4c053d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
 	struct list_head  s_caps;     /* all caps issued by this session */
 	int               s_nr_caps, s_trim_caps;
 	int               s_num_cap_releases;
+	int		  s_cap_reconnect;
 	struct list_head  s_cap_releases; /* waiting cap_release messages */
 	struct list_head  s_cap_releases_done; /* ready to send */
 	struct ceph_cap  *s_cap_iterator;
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap()
  2013-09-22  4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
  2013-09-22  4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
  2013-09-22  4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
@ 2013-09-23  3:48 ` Sage Weil
  2 siblings, 0 replies; 7+ messages in thread
From: Sage Weil @ 2013-09-23  3:48 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel

Reviewed-by: Sage Weil <sage@inktank.com>

On Sun, 22 Sep 2013, Yan, Zheng wrote:

> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> call __queue_cap_release() in __ceph_remove_cap(), this avoids
> acquiring s_cap_lock twice.
> 
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  fs/ceph/caps.c       | 21 +++++++++++----------
>  fs/ceph/mds_client.c |  6 ++----
>  fs/ceph/super.h      |  8 +-------
>  3 files changed, 14 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 13976c3..d2d6e40 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
>   * caller should hold i_ceph_lock.
>   * caller will not hold session s_mutex if called from destroy_inode.
>   */
> -void __ceph_remove_cap(struct ceph_cap *cap)
> +void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>  {
>  	struct ceph_mds_session *session = cap->session;
>  	struct ceph_inode_info *ci = cap->ci;
> @@ -909,6 +909,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
>  
>  	/* remove from session list */
>  	spin_lock(&session->s_cap_lock);
> +	if (queue_release)
> +		__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
> +				    cap->mseq, cap->issue_seq);
> +
>  	if (session->s_cap_iterator == cap) {
>  		/* not yet, we are iterating over this very cap */
>  		dout("__ceph_remove_cap  delaying %p removal from session %p\n",
> @@ -1023,7 +1027,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
>  	struct ceph_mds_cap_release *head;
>  	struct ceph_mds_cap_item *item;
>  
> -	spin_lock(&session->s_cap_lock);
>  	BUG_ON(!session->s_num_cap_releases);
>  	msg = list_first_entry(&session->s_cap_releases,
>  			       struct ceph_msg, list_head);
> @@ -1052,7 +1055,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
>  		     (int)CEPH_CAPS_PER_RELEASE,
>  		     (int)msg->front.iov_len);
>  	}
> -	spin_unlock(&session->s_cap_lock);
>  }
>  
>  /*
> @@ -1067,12 +1069,8 @@ void ceph_queue_caps_release(struct inode *inode)
>  	p = rb_first(&ci->i_caps);
>  	while (p) {
>  		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
> -		struct ceph_mds_session *session = cap->session;
> -
> -		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
> -				    cap->mseq, cap->issue_seq);
>  		p = rb_next(p);
> -		__ceph_remove_cap(cap);
> +		__ceph_remove_cap(cap, true);
>  	}
>  }
>  
> @@ -2791,7 +2789,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>  			}
>  			spin_unlock(&mdsc->cap_dirty_lock);
>  		}
> -		__ceph_remove_cap(cap);
> +		__ceph_remove_cap(cap, false);
>  	}
>  	/* else, we already released it */
>  
> @@ -2931,9 +2929,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
>  	if (!inode) {
>  		dout(" i don't have ino %llx\n", vino.ino);
>  
> -		if (op == CEPH_CAP_OP_IMPORT)
> +		if (op == CEPH_CAP_OP_IMPORT) {
> +			spin_lock(&session->s_cap_lock);
>  			__queue_cap_release(session, vino.ino, cap_id,
>  					    mseq, seq);
> +			spin_unlock(&session->s_cap_lock);
> +		}
>  		goto flush_cap_releases;
>  	}
>  
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index f51ab26..8f8f5c0 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -986,7 +986,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	dout("removing cap %p, ci is %p, inode is %p\n",
>  	     cap, ci, &ci->vfs_inode);
>  	spin_lock(&ci->i_ceph_lock);
> -	__ceph_remove_cap(cap);
> +	__ceph_remove_cap(cap, false);
>  	if (!__ceph_is_any_real_caps(ci)) {
>  		struct ceph_mds_client *mdsc =
>  			ceph_sb_to_client(inode->i_sb)->mdsc;
> @@ -1231,9 +1231,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>  	session->s_trim_caps--;
>  	if (oissued) {
>  		/* we aren't the only cap.. just remove us */
> -		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
> -				    cap->mseq, cap->issue_seq);
> -		__ceph_remove_cap(cap);
> +		__ceph_remove_cap(cap, true);
>  	} else {
>  		/* try to drop referring dentries */
>  		spin_unlock(&ci->i_ceph_lock);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index a538b51..8de94b5 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -742,13 +742,7 @@ extern int ceph_add_cap(struct inode *inode,
>  			int fmode, unsigned issued, unsigned wanted,
>  			unsigned cap, unsigned seq, u64 realmino, int flags,
>  			struct ceph_cap_reservation *caps_reservation);
> -extern void __ceph_remove_cap(struct ceph_cap *cap);
> -static inline void ceph_remove_cap(struct ceph_cap *cap)
> -{
> -	spin_lock(&cap->ci->i_ceph_lock);
> -	__ceph_remove_cap(cap);
> -	spin_unlock(&cap->ci->i_ceph_lock);
> -}
> +extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
>  extern void ceph_put_cap(struct ceph_mds_client *mdsc,
>  			 struct ceph_cap *cap);
>  
> -- 
> 1.8.1.4
> 
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] ceph: handle race between cap reconnect and cap release
  2013-09-22  4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
@ 2013-09-23 14:31   ` Sage Weil
  2013-09-23 16:08     ` Yan, Zheng
  0 siblings, 1 reply; 7+ messages in thread
From: Sage Weil @ 2013-09-23 14:31 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel

On Sun, 22 Sep 2013, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
> 
> When a cap get released while composing the cap reconnect message.
> We should skip queuing the release message if the cap hasn't been
> added to the cap reconnect message.
> 
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  fs/ceph/caps.c       |  4 +++-
>  fs/ceph/mds_client.c | 21 ++++++++++++++++++---
>  fs/ceph/mds_client.h |  1 +
>  3 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index d2d6e40..acc8564 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -909,7 +909,9 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>  
>  	/* remove from session list */
>  	spin_lock(&session->s_cap_lock);
> -	if (queue_release)
> +	if (queue_release &&
> +	    (!session->s_cap_reconnect ||
> +	     cap->cap_gen == session->s_cap_gen))

The locking here is imprecise... s_cap_reconnect is protected by 
s_cap_ttl_lock vs c_cap_lock for s_cap_gen.  I think it does not matter, 
but we should document why (s_cap_ttl_lock unlock includes memory barrier; 
s_cap_gen does not change while s_cap_lock is held?)...

>  		__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
>  				    cap->mseq, cap->issue_seq);
>  
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 6b4ed35..30397a6 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>  	INIT_LIST_HEAD(&s->s_waiting);
>  	INIT_LIST_HEAD(&s->s_unsafe);
>  	s->s_num_cap_releases = 0;
> +	s->s_cap_reconnect = 0;
>  	s->s_cap_iterator = NULL;
>  	INIT_LIST_HEAD(&s->s_cap_releases);
>  	INIT_LIST_HEAD(&s->s_cap_releases_done);
> @@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
>  	unsigned num;
>  
>  	dout("discard_cap_releases mds%d\n", session->s_mds);
> -	spin_lock(&session->s_cap_lock);
>  
>  	/* zero out the in-progress message */
>  	msg = list_first_entry(&session->s_cap_releases,
> @@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
>  		msg->front.iov_len = sizeof(*head);
>  		list_add(&msg->list_head, &session->s_cap_releases);
>  	}
> -
> -	spin_unlock(&session->s_cap_lock);

Why is the locking was dropped here?

Thanks!
sage


>  }
>  
>  /*
> @@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	cap->seq = 0;        /* reset cap seq */
>  	cap->issue_seq = 0;  /* and issue_seq */
>  	cap->mseq = 0;       /* and migrate_seq */
> +	cap->cap_gen = cap->session->s_cap_gen;
>  
>  	if (recon_state->flock) {
>  		rec.v2.cap_id = cpu_to_le64(cap->cap_id);
> @@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
>  	dout("session %p state %s\n", session,
>  	     session_state_name(session->s_state));
>  
> +	spin_lock(&session->s_gen_ttl_lock);
> +	session->s_cap_gen++;
> +	spin_unlock(&session->s_gen_ttl_lock);
> +
> +	spin_lock(&session->s_cap_lock);
> +	/*
> +	 * notify __ceph_remove_cap() that we are composing cap reconnect.
> +	 * If a cap get released before being added to the cap reconnect,
> +	 * __ceph_remove_cap() should skip queuing cap release.
> +	 */
> +	session->s_cap_reconnect = 1;
>  	/* drop old cap expires; we're about to reestablish that state */
>  	discard_cap_releases(mdsc, session);
> +	spin_unlock(&session->s_cap_lock);
>  
>  	/* traverse this session's caps */
>  	s_nr_caps = session->s_nr_caps;
> @@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
>  	if (err < 0)
>  		goto fail;
>  
> +	spin_lock(&session->s_cap_lock);
> +	session->s_cap_reconnect = 0;
> +	spin_unlock(&session->s_cap_lock);
> +
>  	/*
>  	 * snaprealms.  we provide mds with the ino, seq (version), and
>  	 * parent for all of our realms.  If the mds has any newer info,
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index c2a19fb..4c053d0 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -132,6 +132,7 @@ struct ceph_mds_session {
>  	struct list_head  s_caps;     /* all caps issued by this session */
>  	int               s_nr_caps, s_trim_caps;
>  	int               s_num_cap_releases;
> +	int		  s_cap_reconnect;
>  	struct list_head  s_cap_releases; /* waiting cap_release messages */
>  	struct list_head  s_cap_releases_done; /* ready to send */
>  	struct ceph_cap  *s_cap_iterator;
> -- 
> 1.8.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] ceph: handle race between cap reconnect and cap release
  2013-09-23 14:31   ` Sage Weil
@ 2013-09-23 16:08     ` Yan, Zheng
  2013-09-24  1:05       ` Yan, Zheng
  0 siblings, 1 reply; 7+ messages in thread
From: Yan, Zheng @ 2013-09-23 16:08 UTC (permalink / raw)
  To: Sage Weil; +Cc: Yan, Zheng, ceph-devel

On Mon, Sep 23, 2013 at 10:31 PM, Sage Weil <sage@inktank.com> wrote:
> On Sun, 22 Sep 2013, Yan, Zheng wrote:
>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>
>> When a cap get released while composing the cap reconnect message.
>> We should skip queuing the release message if the cap hasn't been
>> added to the cap reconnect message.
>>
>> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
>> ---
>>  fs/ceph/caps.c       |  4 +++-
>>  fs/ceph/mds_client.c | 21 ++++++++++++++++++---
>>  fs/ceph/mds_client.h |  1 +
>>  3 files changed, 22 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index d2d6e40..acc8564 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -909,7 +909,9 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>>
>>       /* remove from session list */
>>       spin_lock(&session->s_cap_lock);
>> -     if (queue_release)
>> +     if (queue_release &&
>> +         (!session->s_cap_reconnect ||
>> +          cap->cap_gen == session->s_cap_gen))
>
> The locking here is imprecise... s_cap_reconnect is protected by
> s_cap_ttl_lock vs c_cap_lock for s_cap_gen.  I think it does not matter,
> but we should document why (s_cap_ttl_lock unlock includes memory barrier;
> s_cap_gen does not change while s_cap_lock is held?)

s_cap_reconnect is protected by s_cap_lock, and no one should change
session->s_cap_gen
while the session is in reconnect state.  So I think the locking here
is race free. (the reason
I don't use "session->s_state == CEPH_MDS_SESSION_RECONNECTING" here is to avoid
writing code that relies implicit memory barrier)

...
>
>>               __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
>>                                   cap->mseq, cap->issue_seq);
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 6b4ed35..30397a6 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>>       INIT_LIST_HEAD(&s->s_waiting);
>>       INIT_LIST_HEAD(&s->s_unsafe);
>>       s->s_num_cap_releases = 0;
>> +     s->s_cap_reconnect = 0;
>>       s->s_cap_iterator = NULL;
>>       INIT_LIST_HEAD(&s->s_cap_releases);
>>       INIT_LIST_HEAD(&s->s_cap_releases_done);
>> @@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
>>       unsigned num;
>>
>>       dout("discard_cap_releases mds%d\n", session->s_mds);
>> -     spin_lock(&session->s_cap_lock);
>>
>>       /* zero out the in-progress message */
>>       msg = list_first_entry(&session->s_cap_releases,
>> @@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
>>               msg->front.iov_len = sizeof(*head);
>>               list_add(&msg->list_head, &session->s_cap_releases);
>>       }
>> -
>> -     spin_unlock(&session->s_cap_lock);
>
> Why is the locking was dropped here?
>

it's moved into send_mds_reconnect()

Regards
Yan, Zheng


> Thanks!
> sage
>
>
>>  }
>>
>>  /*
>> @@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>       cap->seq = 0;        /* reset cap seq */
>>       cap->issue_seq = 0;  /* and issue_seq */
>>       cap->mseq = 0;       /* and migrate_seq */
>> +     cap->cap_gen = cap->session->s_cap_gen;
>>
>>       if (recon_state->flock) {
>>               rec.v2.cap_id = cpu_to_le64(cap->cap_id);
>> @@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
>>       dout("session %p state %s\n", session,
>>            session_state_name(session->s_state));
>>
>> +     spin_lock(&session->s_gen_ttl_lock);
>> +     session->s_cap_gen++;
>> +     spin_unlock(&session->s_gen_ttl_lock);
>> +
>> +     spin_lock(&session->s_cap_lock);
>> +     /*
>> +      * notify __ceph_remove_cap() that we are composing cap reconnect.
>> +      * If a cap get released before being added to the cap reconnect,
>> +      * __ceph_remove_cap() should skip queuing cap release.
>> +      */
>> +     session->s_cap_reconnect = 1;
>>       /* drop old cap expires; we're about to reestablish that state */
>>       discard_cap_releases(mdsc, session);
>> +     spin_unlock(&session->s_cap_lock);
>>
>>       /* traverse this session's caps */
>>       s_nr_caps = session->s_nr_caps;
>> @@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
>>       if (err < 0)
>>               goto fail;
>>
>> +     spin_lock(&session->s_cap_lock);
>> +     session->s_cap_reconnect = 0;
>> +     spin_unlock(&session->s_cap_lock);
>> +
>>       /*
>>        * snaprealms.  we provide mds with the ino, seq (version), and
>>        * parent for all of our realms.  If the mds has any newer info,
>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>> index c2a19fb..4c053d0 100644
>> --- a/fs/ceph/mds_client.h
>> +++ b/fs/ceph/mds_client.h
>> @@ -132,6 +132,7 @@ struct ceph_mds_session {
>>       struct list_head  s_caps;     /* all caps issued by this session */
>>       int               s_nr_caps, s_trim_caps;
>>       int               s_num_cap_releases;
>> +     int               s_cap_reconnect;
>>       struct list_head  s_cap_releases; /* waiting cap_release messages */
>>       struct list_head  s_cap_releases_done; /* ready to send */
>>       struct ceph_cap  *s_cap_iterator;
>> --
>> 1.8.1.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] ceph: handle race between cap reconnect and cap release
  2013-09-23 16:08     ` Yan, Zheng
@ 2013-09-24  1:05       ` Yan, Zheng
  0 siblings, 0 replies; 7+ messages in thread
From: Yan, Zheng @ 2013-09-24  1:05 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel

updated patch. add comments for the locking, no function changes.

---
From d40facb3df480994ab6575a1ce90b645123b1f92 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 22 Sep 2013 11:08:14 +0800
Subject: [PATCH] ceph: handle race between cap reconnect and cap release

When a cap get released while composing the cap reconnect message.
We should skip queuing the release message if the cap hasn't been
added to the cap reconnect message.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c       |  8 +++++++-
 fs/ceph/mds_client.c | 21 ++++++++++++++++++---
 fs/ceph/mds_client.h |  1 +
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d2d6e40..3c0a4bd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -909,7 +909,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 
 	/* remove from session list */
 	spin_lock(&session->s_cap_lock);
-	if (queue_release)
+	/*
+	 * s_cap_reconnect is protected by s_cap_lock. no one changes
+	 * s_cap_gen while session is in the reconnect state.
+	 */
+	if (queue_release &&
+	    (!session->s_cap_reconnect ||
+	     cap->cap_gen == session->s_cap_gen))
 		__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
 				    cap->mseq, cap->issue_seq);
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6b4ed35..30397a6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	INIT_LIST_HEAD(&s->s_waiting);
 	INIT_LIST_HEAD(&s->s_unsafe);
 	s->s_num_cap_releases = 0;
+	s->s_cap_reconnect = 0;
 	s->s_cap_iterator = NULL;
 	INIT_LIST_HEAD(&s->s_cap_releases);
 	INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
 	unsigned num;
 
 	dout("discard_cap_releases mds%d\n", session->s_mds);
-	spin_lock(&session->s_cap_lock);
 
 	/* zero out the in-progress message */
 	msg = list_first_entry(&session->s_cap_releases,
@@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
 		msg->front.iov_len = sizeof(*head);
 		list_add(&msg->list_head, &session->s_cap_releases);
 	}
-
-	spin_unlock(&session->s_cap_lock);
 }
 
 /*
@@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	cap->seq = 0;        /* reset cap seq */
 	cap->issue_seq = 0;  /* and issue_seq */
 	cap->mseq = 0;       /* and migrate_seq */
+	cap->cap_gen = cap->session->s_cap_gen;
 
 	if (recon_state->flock) {
 		rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	dout("session %p state %s\n", session,
 	     session_state_name(session->s_state));
 
+	spin_lock(&session->s_gen_ttl_lock);
+	session->s_cap_gen++;
+	spin_unlock(&session->s_gen_ttl_lock);
+
+	spin_lock(&session->s_cap_lock);
+	/*
+	 * notify __ceph_remove_cap() that we are composing cap reconnect.
+	 * If a cap get released before being added to the cap reconnect,
+	 * __ceph_remove_cap() should skip queuing cap release.
+	 */
+	session->s_cap_reconnect = 1;
 	/* drop old cap expires; we're about to reestablish that state */
 	discard_cap_releases(mdsc, session);
+	spin_unlock(&session->s_cap_lock);
 
 	/* traverse this session's caps */
 	s_nr_caps = session->s_nr_caps;
@@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	if (err < 0)
 		goto fail;
 
+	spin_lock(&session->s_cap_lock);
+	session->s_cap_reconnect = 0;
+	spin_unlock(&session->s_cap_lock);
+
 	/*
 	 * snaprealms.  we provide mds with the ino, seq (version), and
 	 * parent for all of our realms.  If the mds has any newer info,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fb..4c053d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
 	struct list_head  s_caps;     /* all caps issued by this session */
 	int               s_nr_caps, s_trim_caps;
 	int               s_num_cap_releases;
+	int		  s_cap_reconnect;
 	struct list_head  s_cap_releases; /* waiting cap_release messages */
 	struct list_head  s_cap_releases_done; /* ready to send */
 	struct ceph_cap  *s_cap_iterator;
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-09-24  1:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-22  4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
2013-09-22  4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
2013-09-22  4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
2013-09-23 14:31   ` Sage Weil
2013-09-23 16:08     ` Yan, Zheng
2013-09-24  1:05       ` Yan, Zheng
2013-09-23  3:48 ` [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Sage Weil

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.