* [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap()
@ 2013-09-22 4:46 Yan, Zheng
2013-09-22 4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Yan, Zheng @ 2013-09-22 4:46 UTC (permalink / raw)
To: ceph-devel; +Cc: sage, Yan, Zheng
From: "Yan, Zheng" <zheng.z.yan@intel.com>
call __queue_cap_release() in __ceph_remove_cap(), this avoids
acquiring s_cap_lock twice.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
fs/ceph/caps.c | 21 +++++++++++----------
fs/ceph/mds_client.c | 6 ++----
fs/ceph/super.h | 8 +-------
3 files changed, 14 insertions(+), 21 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 13976c3..d2d6e40 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
* caller should hold i_ceph_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
-void __ceph_remove_cap(struct ceph_cap *cap)
+void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
@@ -909,6 +909,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
/* remove from session list */
spin_lock(&session->s_cap_lock);
+ if (queue_release)
+ __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
+ cap->mseq, cap->issue_seq);
+
if (session->s_cap_iterator == cap) {
/* not yet, we are iterating over this very cap */
dout("__ceph_remove_cap delaying %p removal from session %p\n",
@@ -1023,7 +1027,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
struct ceph_mds_cap_release *head;
struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
BUG_ON(!session->s_num_cap_releases);
msg = list_first_entry(&session->s_cap_releases,
struct ceph_msg, list_head);
@@ -1052,7 +1055,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
(int)CEPH_CAPS_PER_RELEASE,
(int)msg->front.iov_len);
}
- spin_unlock(&session->s_cap_lock);
}
/*
@@ -1067,12 +1069,8 @@ void ceph_queue_caps_release(struct inode *inode)
p = rb_first(&ci->i_caps);
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
- struct ceph_mds_session *session = cap->session;
-
- __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
- cap->mseq, cap->issue_seq);
p = rb_next(p);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, true);
}
}
@@ -2791,7 +2789,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
}
spin_unlock(&mdsc->cap_dirty_lock);
}
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, false);
}
/* else, we already released it */
@@ -2931,9 +2929,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
- if (op == CEPH_CAP_OP_IMPORT)
+ if (op == CEPH_CAP_OP_IMPORT) {
+ spin_lock(&session->s_cap_lock);
__queue_cap_release(session, vino.ino, cap_id,
mseq, seq);
+ spin_unlock(&session->s_cap_lock);
+ }
goto flush_cap_releases;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f51ab26..8f8f5c0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -986,7 +986,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, false);
if (!__ceph_is_any_real_caps(ci)) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1231,9 +1231,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
session->s_trim_caps--;
if (oissued) {
/* we aren't the only cap.. just remove us */
- __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
- cap->mseq, cap->issue_seq);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, true);
} else {
/* try to drop referring dentries */
spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a538b51..8de94b5 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -742,13 +742,7 @@ extern int ceph_add_cap(struct inode *inode,
int fmode, unsigned issued, unsigned wanted,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap_reservation *caps_reservation);
-extern void __ceph_remove_cap(struct ceph_cap *cap);
-static inline void ceph_remove_cap(struct ceph_cap *cap)
-{
- spin_lock(&cap->ci->i_ceph_lock);
- __ceph_remove_cap(cap);
- spin_unlock(&cap->ci->i_ceph_lock);
-}
+extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] ceph: set caps count after composing cap reconnect message
2013-09-22 4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
@ 2013-09-22 4:46 ` Yan, Zheng
2013-09-22 4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
2013-09-23 3:48 ` [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Sage Weil
2 siblings, 0 replies; 7+ messages in thread
From: Yan, Zheng @ 2013-09-22 4:46 UTC (permalink / raw)
To: ceph-devel; +Cc: sage, Yan, Zheng
From: "Yan, Zheng" <zheng.z.yan@intel.com>
It's possible that some caps get released while composing the cap
reconnect message.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
fs/ceph/mds_client.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8f8f5c0..6b4ed35 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -43,6 +43,7 @@
*/
struct ceph_reconnect_state {
+ int nr_caps;
struct ceph_pagelist *pagelist;
bool flock;
};
@@ -2549,6 +2550,8 @@ encode_again:
} else {
err = ceph_pagelist_append(pagelist, &rec, reclen);
}
+
+ recon_state->nr_caps++;
out_free:
kfree(path);
out_dput:
@@ -2576,6 +2579,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
struct rb_node *p;
int mds = session->s_mds;
int err = -ENOMEM;
+ int s_nr_caps;
struct ceph_pagelist *pagelist;
struct ceph_reconnect_state recon_state;
@@ -2611,10 +2615,12 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
discard_cap_releases(mdsc, session);
/* traverse this session's caps */
- err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
+ s_nr_caps = session->s_nr_caps;
+ err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
if (err)
goto fail;
+ recon_state.nr_caps = 0;
recon_state.pagelist = pagelist;
recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
err = iterate_session_caps(session, encode_caps_cb, &recon_state);
@@ -2643,11 +2649,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
if (recon_state.flock)
reply->hdr.version = cpu_to_le16(2);
- if (pagelist->length) {
- /* set up outbound data if we have any */
- reply->hdr.data_len = cpu_to_le32(pagelist->length);
- ceph_msg_data_add_pagelist(reply, pagelist);
- }
+
+ /* raced with cap release? */
+ if (s_nr_caps != recon_state.nr_caps) {
+ struct page *page = list_first_entry(&pagelist->head,
+ struct page, lru);
+ __le32 *addr = kmap_atomic(page);
+ *addr = cpu_to_le32(recon_state.nr_caps);
+ kunmap_atomic(addr);
+ }
+
+ reply->hdr.data_len = cpu_to_le32(pagelist->length);
+ ceph_msg_data_add_pagelist(reply, pagelist);
ceph_con_send(&session->s_con, reply);
mutex_unlock(&session->s_mutex);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] ceph: handle race between cap reconnect and cap release
2013-09-22 4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
2013-09-22 4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
@ 2013-09-22 4:46 ` Yan, Zheng
2013-09-23 14:31 ` Sage Weil
2013-09-23 3:48 ` [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Sage Weil
2 siblings, 1 reply; 7+ messages in thread
From: Yan, Zheng @ 2013-09-22 4:46 UTC (permalink / raw)
To: ceph-devel; +Cc: sage, Yan, Zheng
From: "Yan, Zheng" <zheng.z.yan@intel.com>
When a cap get released while composing the cap reconnect message.
We should skip queuing the release message if the cap hasn't been
added to the cap reconnect message.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
fs/ceph/caps.c | 4 +++-
fs/ceph/mds_client.c | 21 ++++++++++++++++++---
fs/ceph/mds_client.h | 1 +
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d2d6e40..acc8564 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -909,7 +909,9 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
/* remove from session list */
spin_lock(&session->s_cap_lock);
- if (queue_release)
+ if (queue_release &&
+ (!session->s_cap_reconnect ||
+ cap->cap_gen == session->s_cap_gen))
__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
cap->mseq, cap->issue_seq);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6b4ed35..30397a6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
INIT_LIST_HEAD(&s->s_waiting);
INIT_LIST_HEAD(&s->s_unsafe);
s->s_num_cap_releases = 0;
+ s->s_cap_reconnect = 0;
s->s_cap_iterator = NULL;
INIT_LIST_HEAD(&s->s_cap_releases);
INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
unsigned num;
dout("discard_cap_releases mds%d\n", session->s_mds);
- spin_lock(&session->s_cap_lock);
/* zero out the in-progress message */
msg = list_first_entry(&session->s_cap_releases,
@@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
msg->front.iov_len = sizeof(*head);
list_add(&msg->list_head, &session->s_cap_releases);
}
-
- spin_unlock(&session->s_cap_lock);
}
/*
@@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
cap->mseq = 0; /* and migrate_seq */
+ cap->cap_gen = cap->session->s_cap_gen;
if (recon_state->flock) {
rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
dout("session %p state %s\n", session,
session_state_name(session->s_state));
+ spin_lock(&session->s_gen_ttl_lock);
+ session->s_cap_gen++;
+ spin_unlock(&session->s_gen_ttl_lock);
+
+ spin_lock(&session->s_cap_lock);
+ /*
+ * notify __ceph_remove_cap() that we are composing cap reconnect.
+ * If a cap get released before being added to the cap reconnect,
+ * __ceph_remove_cap() should skip queuing cap release.
+ */
+ session->s_cap_reconnect = 1;
/* drop old cap expires; we're about to reestablish that state */
discard_cap_releases(mdsc, session);
+ spin_unlock(&session->s_cap_lock);
/* traverse this session's caps */
s_nr_caps = session->s_nr_caps;
@@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
if (err < 0)
goto fail;
+ spin_lock(&session->s_cap_lock);
+ session->s_cap_reconnect = 0;
+ spin_unlock(&session->s_cap_lock);
+
/*
* snaprealms. we provide mds with the ino, seq (version), and
* parent for all of our realms. If the mds has any newer info,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fb..4c053d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
struct list_head s_caps; /* all caps issued by this session */
int s_nr_caps, s_trim_caps;
int s_num_cap_releases;
+ int s_cap_reconnect;
struct list_head s_cap_releases; /* waiting cap_release messages */
struct list_head s_cap_releases_done; /* ready to send */
struct ceph_cap *s_cap_iterator;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap()
2013-09-22 4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
2013-09-22 4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
2013-09-22 4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
@ 2013-09-23 3:48 ` Sage Weil
2 siblings, 0 replies; 7+ messages in thread
From: Sage Weil @ 2013-09-23 3:48 UTC (permalink / raw)
To: Yan, Zheng; +Cc: ceph-devel
Reviewed-by: Sage Weil <sage@inktank.com>
On Sun, 22 Sep 2013, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>
> call __queue_cap_release() in __ceph_remove_cap(), this avoids
> acquiring s_cap_lock twice.
>
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
> fs/ceph/caps.c | 21 +++++++++++----------
> fs/ceph/mds_client.c | 6 ++----
> fs/ceph/super.h | 8 +-------
> 3 files changed, 14 insertions(+), 21 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 13976c3..d2d6e40 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
> * caller should hold i_ceph_lock.
> * caller will not hold session s_mutex if called from destroy_inode.
> */
> -void __ceph_remove_cap(struct ceph_cap *cap)
> +void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
> {
> struct ceph_mds_session *session = cap->session;
> struct ceph_inode_info *ci = cap->ci;
> @@ -909,6 +909,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
>
> /* remove from session list */
> spin_lock(&session->s_cap_lock);
> + if (queue_release)
> + __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
> + cap->mseq, cap->issue_seq);
> +
> if (session->s_cap_iterator == cap) {
> /* not yet, we are iterating over this very cap */
> dout("__ceph_remove_cap delaying %p removal from session %p\n",
> @@ -1023,7 +1027,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
> struct ceph_mds_cap_release *head;
> struct ceph_mds_cap_item *item;
>
> - spin_lock(&session->s_cap_lock);
> BUG_ON(!session->s_num_cap_releases);
> msg = list_first_entry(&session->s_cap_releases,
> struct ceph_msg, list_head);
> @@ -1052,7 +1055,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
> (int)CEPH_CAPS_PER_RELEASE,
> (int)msg->front.iov_len);
> }
> - spin_unlock(&session->s_cap_lock);
> }
>
> /*
> @@ -1067,12 +1069,8 @@ void ceph_queue_caps_release(struct inode *inode)
> p = rb_first(&ci->i_caps);
> while (p) {
> struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
> - struct ceph_mds_session *session = cap->session;
> -
> - __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
> - cap->mseq, cap->issue_seq);
> p = rb_next(p);
> - __ceph_remove_cap(cap);
> + __ceph_remove_cap(cap, true);
> }
> }
>
> @@ -2791,7 +2789,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
> }
> spin_unlock(&mdsc->cap_dirty_lock);
> }
> - __ceph_remove_cap(cap);
> + __ceph_remove_cap(cap, false);
> }
> /* else, we already released it */
>
> @@ -2931,9 +2929,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
> if (!inode) {
> dout(" i don't have ino %llx\n", vino.ino);
>
> - if (op == CEPH_CAP_OP_IMPORT)
> + if (op == CEPH_CAP_OP_IMPORT) {
> + spin_lock(&session->s_cap_lock);
> __queue_cap_release(session, vino.ino, cap_id,
> mseq, seq);
> + spin_unlock(&session->s_cap_lock);
> + }
> goto flush_cap_releases;
> }
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index f51ab26..8f8f5c0 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -986,7 +986,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> dout("removing cap %p, ci is %p, inode is %p\n",
> cap, ci, &ci->vfs_inode);
> spin_lock(&ci->i_ceph_lock);
> - __ceph_remove_cap(cap);
> + __ceph_remove_cap(cap, false);
> if (!__ceph_is_any_real_caps(ci)) {
> struct ceph_mds_client *mdsc =
> ceph_sb_to_client(inode->i_sb)->mdsc;
> @@ -1231,9 +1231,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
> session->s_trim_caps--;
> if (oissued) {
> /* we aren't the only cap.. just remove us */
> - __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
> - cap->mseq, cap->issue_seq);
> - __ceph_remove_cap(cap);
> + __ceph_remove_cap(cap, true);
> } else {
> /* try to drop referring dentries */
> spin_unlock(&ci->i_ceph_lock);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index a538b51..8de94b5 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -742,13 +742,7 @@ extern int ceph_add_cap(struct inode *inode,
> int fmode, unsigned issued, unsigned wanted,
> unsigned cap, unsigned seq, u64 realmino, int flags,
> struct ceph_cap_reservation *caps_reservation);
> -extern void __ceph_remove_cap(struct ceph_cap *cap);
> -static inline void ceph_remove_cap(struct ceph_cap *cap)
> -{
> - spin_lock(&cap->ci->i_ceph_lock);
> - __ceph_remove_cap(cap);
> - spin_unlock(&cap->ci->i_ceph_lock);
> -}
> +extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
> extern void ceph_put_cap(struct ceph_mds_client *mdsc,
> struct ceph_cap *cap);
>
> --
> 1.8.1.4
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] ceph: handle race between cap reconnect and cap release
2013-09-22 4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
@ 2013-09-23 14:31 ` Sage Weil
2013-09-23 16:08 ` Yan, Zheng
0 siblings, 1 reply; 7+ messages in thread
From: Sage Weil @ 2013-09-23 14:31 UTC (permalink / raw)
To: Yan, Zheng; +Cc: ceph-devel
On Sun, 22 Sep 2013, Yan, Zheng wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>
> When a cap get released while composing the cap reconnect message.
> We should skip queuing the release message if the cap hasn't been
> added to the cap reconnect message.
>
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
> fs/ceph/caps.c | 4 +++-
> fs/ceph/mds_client.c | 21 ++++++++++++++++++---
> fs/ceph/mds_client.h | 1 +
> 3 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index d2d6e40..acc8564 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -909,7 +909,9 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>
> /* remove from session list */
> spin_lock(&session->s_cap_lock);
> - if (queue_release)
> + if (queue_release &&
> + (!session->s_cap_reconnect ||
> + cap->cap_gen == session->s_cap_gen))
The locking here is imprecise... s_cap_reconnect is protected by
s_cap_ttl_lock vs c_cap_lock for s_cap_gen. I think it does not matter,
but we should document why (s_cap_ttl_lock unlock includes memory barrier;
s_cap_gen does not change while s_cap_lock is held?)...
> __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
> cap->mseq, cap->issue_seq);
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 6b4ed35..30397a6 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
> INIT_LIST_HEAD(&s->s_waiting);
> INIT_LIST_HEAD(&s->s_unsafe);
> s->s_num_cap_releases = 0;
> + s->s_cap_reconnect = 0;
> s->s_cap_iterator = NULL;
> INIT_LIST_HEAD(&s->s_cap_releases);
> INIT_LIST_HEAD(&s->s_cap_releases_done);
> @@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
> unsigned num;
>
> dout("discard_cap_releases mds%d\n", session->s_mds);
> - spin_lock(&session->s_cap_lock);
>
> /* zero out the in-progress message */
> msg = list_first_entry(&session->s_cap_releases,
> @@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
> msg->front.iov_len = sizeof(*head);
> list_add(&msg->list_head, &session->s_cap_releases);
> }
> -
> - spin_unlock(&session->s_cap_lock);
Why is the locking was dropped here?
Thanks!
sage
> }
>
> /*
> @@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
> cap->seq = 0; /* reset cap seq */
> cap->issue_seq = 0; /* and issue_seq */
> cap->mseq = 0; /* and migrate_seq */
> + cap->cap_gen = cap->session->s_cap_gen;
>
> if (recon_state->flock) {
> rec.v2.cap_id = cpu_to_le64(cap->cap_id);
> @@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> dout("session %p state %s\n", session,
> session_state_name(session->s_state));
>
> + spin_lock(&session->s_gen_ttl_lock);
> + session->s_cap_gen++;
> + spin_unlock(&session->s_gen_ttl_lock);
> +
> + spin_lock(&session->s_cap_lock);
> + /*
> + * notify __ceph_remove_cap() that we are composing cap reconnect.
> + * If a cap get released before being added to the cap reconnect,
> + * __ceph_remove_cap() should skip queuing cap release.
> + */
> + session->s_cap_reconnect = 1;
> /* drop old cap expires; we're about to reestablish that state */
> discard_cap_releases(mdsc, session);
> + spin_unlock(&session->s_cap_lock);
>
> /* traverse this session's caps */
> s_nr_caps = session->s_nr_caps;
> @@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> if (err < 0)
> goto fail;
>
> + spin_lock(&session->s_cap_lock);
> + session->s_cap_reconnect = 0;
> + spin_unlock(&session->s_cap_lock);
> +
> /*
> * snaprealms. we provide mds with the ino, seq (version), and
> * parent for all of our realms. If the mds has any newer info,
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index c2a19fb..4c053d0 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -132,6 +132,7 @@ struct ceph_mds_session {
> struct list_head s_caps; /* all caps issued by this session */
> int s_nr_caps, s_trim_caps;
> int s_num_cap_releases;
> + int s_cap_reconnect;
> struct list_head s_cap_releases; /* waiting cap_release messages */
> struct list_head s_cap_releases_done; /* ready to send */
> struct ceph_cap *s_cap_iterator;
> --
> 1.8.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] ceph: handle race between cap reconnect and cap release
2013-09-23 14:31 ` Sage Weil
@ 2013-09-23 16:08 ` Yan, Zheng
2013-09-24 1:05 ` Yan, Zheng
0 siblings, 1 reply; 7+ messages in thread
From: Yan, Zheng @ 2013-09-23 16:08 UTC (permalink / raw)
To: Sage Weil; +Cc: Yan, Zheng, ceph-devel
On Mon, Sep 23, 2013 at 10:31 PM, Sage Weil <sage@inktank.com> wrote:
> On Sun, 22 Sep 2013, Yan, Zheng wrote:
>> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>>
>> When a cap get released while composing the cap reconnect message.
>> We should skip queuing the release message if the cap hasn't been
>> added to the cap reconnect message.
>>
>> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
>> ---
>> fs/ceph/caps.c | 4 +++-
>> fs/ceph/mds_client.c | 21 ++++++++++++++++++---
>> fs/ceph/mds_client.h | 1 +
>> 3 files changed, 22 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index d2d6e40..acc8564 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -909,7 +909,9 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
>>
>> /* remove from session list */
>> spin_lock(&session->s_cap_lock);
>> - if (queue_release)
>> + if (queue_release &&
>> + (!session->s_cap_reconnect ||
>> + cap->cap_gen == session->s_cap_gen))
>
> The locking here is imprecise... s_cap_reconnect is protected by
> s_cap_ttl_lock vs c_cap_lock for s_cap_gen. I think it does not matter,
> but we should document why (s_cap_ttl_lock unlock includes memory barrier;
> s_cap_gen does not change while s_cap_lock is held?)
s_cap_reconnect is protected by s_cap_lock, and no one should change
session->s_cap_gen
while the session is in reconnect state. So I think the locking here
is race free. (the reason
I don't use "session->s_state == CEPH_MDS_SESSION_RECONNECTING" here is to avoid
writing code that relies implicit memory barrier)
...
>
>> __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
>> cap->mseq, cap->issue_seq);
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 6b4ed35..30397a6 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>> INIT_LIST_HEAD(&s->s_waiting);
>> INIT_LIST_HEAD(&s->s_unsafe);
>> s->s_num_cap_releases = 0;
>> + s->s_cap_reconnect = 0;
>> s->s_cap_iterator = NULL;
>> INIT_LIST_HEAD(&s->s_cap_releases);
>> INIT_LIST_HEAD(&s->s_cap_releases_done);
>> @@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
>> unsigned num;
>>
>> dout("discard_cap_releases mds%d\n", session->s_mds);
>> - spin_lock(&session->s_cap_lock);
>>
>> /* zero out the in-progress message */
>> msg = list_first_entry(&session->s_cap_releases,
>> @@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
>> msg->front.iov_len = sizeof(*head);
>> list_add(&msg->list_head, &session->s_cap_releases);
>> }
>> -
>> - spin_unlock(&session->s_cap_lock);
>
> Why is the locking was dropped here?
>
it's moved into send_mds_reconnect()
Regards
Yan, Zheng
> Thanks!
> sage
>
>
>> }
>>
>> /*
>> @@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
>> cap->seq = 0; /* reset cap seq */
>> cap->issue_seq = 0; /* and issue_seq */
>> cap->mseq = 0; /* and migrate_seq */
>> + cap->cap_gen = cap->session->s_cap_gen;
>>
>> if (recon_state->flock) {
>> rec.v2.cap_id = cpu_to_le64(cap->cap_id);
>> @@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
>> dout("session %p state %s\n", session,
>> session_state_name(session->s_state));
>>
>> + spin_lock(&session->s_gen_ttl_lock);
>> + session->s_cap_gen++;
>> + spin_unlock(&session->s_gen_ttl_lock);
>> +
>> + spin_lock(&session->s_cap_lock);
>> + /*
>> + * notify __ceph_remove_cap() that we are composing cap reconnect.
>> + * If a cap get released before being added to the cap reconnect,
>> + * __ceph_remove_cap() should skip queuing cap release.
>> + */
>> + session->s_cap_reconnect = 1;
>> /* drop old cap expires; we're about to reestablish that state */
>> discard_cap_releases(mdsc, session);
>> + spin_unlock(&session->s_cap_lock);
>>
>> /* traverse this session's caps */
>> s_nr_caps = session->s_nr_caps;
>> @@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
>> if (err < 0)
>> goto fail;
>>
>> + spin_lock(&session->s_cap_lock);
>> + session->s_cap_reconnect = 0;
>> + spin_unlock(&session->s_cap_lock);
>> +
>> /*
>> * snaprealms. we provide mds with the ino, seq (version), and
>> * parent for all of our realms. If the mds has any newer info,
>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>> index c2a19fb..4c053d0 100644
>> --- a/fs/ceph/mds_client.h
>> +++ b/fs/ceph/mds_client.h
>> @@ -132,6 +132,7 @@ struct ceph_mds_session {
>> struct list_head s_caps; /* all caps issued by this session */
>> int s_nr_caps, s_trim_caps;
>> int s_num_cap_releases;
>> + int s_cap_reconnect;
>> struct list_head s_cap_releases; /* waiting cap_release messages */
>> struct list_head s_cap_releases_done; /* ready to send */
>> struct ceph_cap *s_cap_iterator;
>> --
>> 1.8.1.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] ceph: handle race between cap reconnect and cap release
2013-09-23 16:08 ` Yan, Zheng
@ 2013-09-24 1:05 ` Yan, Zheng
0 siblings, 0 replies; 7+ messages in thread
From: Yan, Zheng @ 2013-09-24 1:05 UTC (permalink / raw)
To: Sage Weil; +Cc: ceph-devel
updated patch. add comments for the locking, no function changes.
---
From d40facb3df480994ab6575a1ce90b645123b1f92 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 22 Sep 2013 11:08:14 +0800
Subject: [PATCH] ceph: handle race between cap reconnect and cap release
When a cap get released while composing the cap reconnect message.
We should skip queuing the release message if the cap hasn't been
added to the cap reconnect message.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
fs/ceph/caps.c | 8 +++++++-
fs/ceph/mds_client.c | 21 ++++++++++++++++++---
fs/ceph/mds_client.h | 1 +
3 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d2d6e40..3c0a4bd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -909,7 +909,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
/* remove from session list */
spin_lock(&session->s_cap_lock);
- if (queue_release)
+ /*
+ * s_cap_reconnect is protected by s_cap_lock. no one changes
+ * s_cap_gen while session is in the reconnect state.
+ */
+ if (queue_release &&
+ (!session->s_cap_reconnect ||
+ cap->cap_gen == session->s_cap_gen))
__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
cap->mseq, cap->issue_seq);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6b4ed35..30397a6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -444,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
INIT_LIST_HEAD(&s->s_waiting);
INIT_LIST_HEAD(&s->s_unsafe);
s->s_num_cap_releases = 0;
+ s->s_cap_reconnect = 0;
s->s_cap_iterator = NULL;
INIT_LIST_HEAD(&s->s_cap_releases);
INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -1415,7 +1416,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
unsigned num;
dout("discard_cap_releases mds%d\n", session->s_mds);
- spin_lock(&session->s_cap_lock);
/* zero out the in-progress message */
msg = list_first_entry(&session->s_cap_releases,
@@ -1442,8 +1442,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
msg->front.iov_len = sizeof(*head);
list_add(&msg->list_head, &session->s_cap_releases);
}
-
- spin_unlock(&session->s_cap_lock);
}
/*
@@ -2488,6 +2486,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
cap->mseq = 0; /* and migrate_seq */
+ cap->cap_gen = cap->session->s_cap_gen;
if (recon_state->flock) {
rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2611,8 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
dout("session %p state %s\n", session,
session_state_name(session->s_state));
+ spin_lock(&session->s_gen_ttl_lock);
+ session->s_cap_gen++;
+ spin_unlock(&session->s_gen_ttl_lock);
+
+ spin_lock(&session->s_cap_lock);
+ /*
+ * notify __ceph_remove_cap() that we are composing cap reconnect.
+ * If a cap get released before being added to the cap reconnect,
+ * __ceph_remove_cap() should skip queuing cap release.
+ */
+ session->s_cap_reconnect = 1;
/* drop old cap expires; we're about to reestablish that state */
discard_cap_releases(mdsc, session);
+ spin_unlock(&session->s_cap_lock);
/* traverse this session's caps */
s_nr_caps = session->s_nr_caps;
@@ -2627,6 +2638,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
if (err < 0)
goto fail;
+ spin_lock(&session->s_cap_lock);
+ session->s_cap_reconnect = 0;
+ spin_unlock(&session->s_cap_lock);
+
/*
* snaprealms. we provide mds with the ino, seq (version), and
* parent for all of our realms. If the mds has any newer info,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fb..4c053d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
struct list_head s_caps; /* all caps issued by this session */
int s_nr_caps, s_trim_caps;
int s_num_cap_releases;
+ int s_cap_reconnect;
struct list_head s_cap_releases; /* waiting cap_release messages */
struct list_head s_cap_releases_done; /* ready to send */
struct ceph_cap *s_cap_iterator;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2013-09-24 1:05 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-22 4:46 [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Yan, Zheng
2013-09-22 4:46 ` [PATCH 2/3] ceph: set caps count after composing cap reconnect message Yan, Zheng
2013-09-22 4:46 ` [PATCH 3/3] ceph: handle race between cap reconnect and cap release Yan, Zheng
2013-09-23 14:31 ` Sage Weil
2013-09-23 16:08 ` Yan, Zheng
2013-09-24 1:05 ` Yan, Zheng
2013-09-23 3:48 ` [PATCH 1/3] ceph: queue cap release in __ceph_remove_cap() Sage Weil
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.