All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps()
@ 2017-04-05  1:30 Yan, Zheng
  2017-04-05  1:30 ` [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds Yan, Zheng
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Yan, Zheng @ 2017-04-05  1:30 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 579a16c..0480492 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -123,7 +123,7 @@ int ceph_renew_caps(struct inode *inode)
 	spin_lock(&ci->i_ceph_lock);
 	wanted = __ceph_caps_file_wanted(ci);
 	if (__ceph_is_any_real_caps(ci) &&
-	    (!(wanted & CEPH_CAP_ANY_WR) == 0 || ci->i_auth_cap)) {
+	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
 		int issued = __ceph_caps_issued(ci, NULL);
 		spin_unlock(&ci->i_ceph_lock);
 		dout("renew caps %p want %s issued %s updating mds_wanted\n",
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds
  2017-04-05  1:30 [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Yan, Zheng
@ 2017-04-05  1:30 ` Yan, Zheng
  2017-04-05  4:39   ` Patrick Donnelly
  2017-04-05 22:47   ` Luis Henriques
  2017-04-05  1:30 ` [PATCH 3/5] ceph: fix potential use-after-free Yan, Zheng
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 11+ messages in thread
From: Yan, Zheng @ 2017-04-05  1:30 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

mdsmap::m_max_mds is the expected count of active mds. It's not the
max rank of active mds. User can decrease mdsmap::m_max_mds, but does
not stop mds whose rank >= mdsmap::m_max_mds.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/debugfs.c           | 23 +++++++++++------------
 fs/ceph/mds_client.c        | 10 +++++-----
 fs/ceph/mdsmap.c            | 44 +++++++++++++++++++++++++++++++++++++-------
 include/linux/ceph/mdsmap.h |  7 ++++---
 4 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index f2ae393..1ff62ff 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -22,20 +22,19 @@ static int mdsmap_show(struct seq_file *s, void *p)
 {
 	int i;
 	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mdsmap *mdsmap = fsc->mdsc->mdsmap;
 
-	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
+	if (fsc->mdsc == NULL || mdsmap == NULL)
 		return 0;
-	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
-	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
-	seq_printf(s, "session_timeout %d\n",
-		       fsc->mdsc->mdsmap->m_session_timeout);
-	seq_printf(s, "session_autoclose %d\n",
-		       fsc->mdsc->mdsmap->m_session_autoclose);
-	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
-		struct ceph_entity_addr *addr =
-			&fsc->mdsc->mdsmap->m_info[i].addr;
-		int state = fsc->mdsc->mdsmap->m_info[i].state;
-
+	seq_printf(s, "epoch %d\n", mdsmap->m_epoch);
+	seq_printf(s, "root %d\n", mdsmap->m_root);
+	seq_printf(s, "root %d\n", mdsmap->m_root);
+	seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
+	seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
+	seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
+	for (i = 0; i < mdsmap->m_num_mds; i++) {
+		struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
+		int state = mdsmap->m_info[i].state;
 		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
 			       ceph_pr_addr(&addr->in_addr),
 			       ceph_mds_state_name(state));
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 820bf0f..163f0d3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -441,7 +441,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 {
 	struct ceph_mds_session *s;
 
-	if (mds >= mdsc->mdsmap->m_max_mds)
+	if (mds >= mdsc->mdsmap->m_num_mds)
 		return ERR_PTR(-EINVAL);
 
 	s = kzalloc(sizeof(*s), GFP_NOFS);
@@ -1004,7 +1004,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
 	struct ceph_mds_session *ts;
 	int i, mds = session->s_mds;
 
-	if (mds >= mdsc->mdsmap->m_max_mds)
+	if (mds >= mdsc->mdsmap->m_num_mds)
 		return;
 
 	mi = &mdsc->mdsmap->m_info[mds];
@@ -3127,7 +3127,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 	dout("check_new_map new %u old %u\n",
 	     newmap->m_epoch, oldmap->m_epoch);
 
-	for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) {
 		if (mdsc->sessions[i] == NULL)
 			continue;
 		s = mdsc->sessions[i];
@@ -3141,7 +3141,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
 		     ceph_session_state_name(s->s_state));
 
-		if (i >= newmap->m_max_mds ||
+		if (i >= newmap->m_num_mds ||
 		    memcmp(ceph_mdsmap_get_addr(oldmap, i),
 			   ceph_mdsmap_get_addr(newmap, i),
 			   sizeof(struct ceph_entity_addr))) {
@@ -3187,7 +3187,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 		}
 	}
 
-	for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
+	for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) {
 		s = mdsc->sessions[i];
 		if (!s)
 			continue;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 5454e23..1a748cf 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -22,11 +22,11 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	int i;
 
 	/* special case for one mds */
-	if (1 == m->m_max_mds && m->m_info[0].state > 0)
+	if (1 == m->m_num_mds && m->m_info[0].state > 0)
 		return 0;
 
 	/* count */
-	for (i = 0; i < m->m_max_mds; i++)
+	for (i = 0; i < m->m_num_mds; i++)
 		if (m->m_info[i].state > 0)
 			n++;
 	if (n == 0)
@@ -135,8 +135,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	m->m_session_autoclose = ceph_decode_32(p);
 	m->m_max_file_size = ceph_decode_64(p);
 	m->m_max_mds = ceph_decode_32(p);
+	m->m_num_mds = m->m_max_mds;
 
-	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
+	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
 	if (m->m_info == NULL)
 		goto nomem;
 
@@ -207,9 +208,20 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		     ceph_pr_addr(&addr.in_addr),
 		     ceph_mds_state_name(state));
 
-		if (mds < 0 || mds >= m->m_max_mds || state <= 0)
+		if (mds < 0 || state <= 0)
 			continue;
 
+		if (mds >= m->m_num_mds) {
+			int new_num = max(mds + 1, m->m_num_mds * 2);
+			void *new_m_info = krealloc(m->m_info,
+						new_num * sizeof(*m->m_info),
+						GFP_NOFS | __GFP_ZERO);
+			if (!new_m_info)
+				goto nomem;
+			m->m_info = new_m_info;
+			m->m_num_mds = new_num;
+		}
+
 		info = &m->m_info[mds];
 		info->global_id = global_id;
 		info->state = state;
@@ -229,6 +241,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 			info->export_targets = NULL;
 		}
 	}
+	if (m->m_num_mds > m->m_max_mds) {
+		/* find max up mds */
+		for (i = m->m_num_mds; i >= m->m_max_mds; i--) {
+			if (i == 0 || m->m_info[i-1].state > 0)
+				break;
+		}
+		m->m_num_mds = i;
+	}
 
 	/* pg_pools */
 	ceph_decode_32_safe(p, end, n, bad);
@@ -270,12 +290,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 
 		for (i = 0; i < n; i++) {
 			s32 mds = ceph_decode_32(p);
-			if (mds >= 0 && mds < m->m_max_mds) {
+			if (mds >= 0 && mds < m->m_num_mds) {
 				if (m->m_info[mds].laggy)
 					num_laggy++;
 			}
 		}
 		m->m_num_laggy = num_laggy;
+
+		if (n > m->m_num_mds) {
+			void *new_m_info = krealloc(m->m_info,
+						    n * sizeof(*m->m_info),
+						    GFP_NOFS | __GFP_ZERO);
+			if (!new_m_info)
+				goto nomem;
+			m->m_info = new_m_info;
+		}
+		m->m_num_mds = n;
 	}
 
 	/* inc */
@@ -341,7 +371,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
 {
 	int i;
 
-	for (i = 0; i < m->m_max_mds; i++)
+	for (i = 0; i < m->m_num_mds; i++)
 		kfree(m->m_info[i].export_targets);
 	kfree(m->m_info);
 	kfree(m->m_data_pg_pools);
@@ -357,7 +387,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
 		return false;
 	if (m->m_num_laggy > 0)
 		return false;
-	for (i = 0; i < m->m_max_mds; i++) {
+	for (i = 0; i < m->m_num_mds; i++) {
 		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
 			nr_active++;
 	}
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 8ed5dc5..d5f783f 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -25,6 +25,7 @@ struct ceph_mdsmap {
 	u32 m_session_autoclose;        /* seconds */
 	u64 m_max_file_size;
 	u32 m_max_mds;                  /* size of m_addr, m_state arrays */
+	int m_num_mds;
 	struct ceph_mds_info *m_info;
 
 	/* which object pools file data can be stored in */
@@ -40,7 +41,7 @@ struct ceph_mdsmap {
 static inline struct ceph_entity_addr *
 ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
 {
-	if (w >= m->m_max_mds)
+	if (w >= m->m_num_mds)
 		return NULL;
 	return &m->m_info[w].addr;
 }
@@ -48,14 +49,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
 static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
 {
 	BUG_ON(w < 0);
-	if (w >= m->m_max_mds)
+	if (w >= m->m_num_mds)
 		return CEPH_MDS_STATE_DNE;
 	return m->m_info[w].state;
 }
 
 static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
 {
-	if (w >= 0 && w < m->m_max_mds)
+	if (w >= 0 && w < m->m_num_mds)
 		return m->m_info[w].laggy;
 	return false;
 }
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] ceph: fix potential use-after-free
  2017-04-05  1:30 [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Yan, Zheng
  2017-04-05  1:30 ` [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds Yan, Zheng
@ 2017-04-05  1:30 ` Yan, Zheng
  2017-04-05 17:21   ` Jeff Layton
  2017-04-05  1:30 ` [PATCH 4/5] ceph: close stopped mds' session Yan, Zheng
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Yan, Zheng @ 2017-04-05  1:30 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

__unregister_session() free the session if it drops the last
reference. We should grab an extra reference if we want to use
session after __unregister_session().

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/mds_client.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 163f0d3..bf765a8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2658,8 +2658,10 @@ static void handle_session(struct ceph_mds_session *session,
 	seq = le64_to_cpu(h->seq);
 
 	mutex_lock(&mdsc->mutex);
-	if (op == CEPH_SESSION_CLOSE)
+	if (op == CEPH_SESSION_CLOSE) {
+		get_session(session);
 		__unregister_session(mdsc, session);
+	}
 	/* FIXME: this ttl calculation is generous */
 	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
 	mutex_unlock(&mdsc->mutex);
@@ -2748,6 +2750,8 @@ static void handle_session(struct ceph_mds_session *session,
 			kick_requests(mdsc, mds);
 		mutex_unlock(&mdsc->mutex);
 	}
+	if (op == CEPH_SESSION_CLOSE)
+		ceph_put_mds_session(session);
 	return;
 
 bad:
@@ -3148,8 +3152,10 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 			if (s->s_state == CEPH_MDS_SESSION_OPENING) {
 				/* the session never opened, just close it
 				 * out now */
-				__wake_requests(mdsc, &s->s_waiting);
+				get_session(s);
 				__unregister_session(mdsc, s);
+				__wake_requests(mdsc, &s->s_waiting);
+				ceph_put_mds_session(s);
 			} else {
 				/* just close it */
 				mutex_unlock(&mdsc->mutex);
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] ceph: close stopped mds' session
  2017-04-05  1:30 [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Yan, Zheng
  2017-04-05  1:30 ` [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds Yan, Zheng
  2017-04-05  1:30 ` [PATCH 3/5] ceph: fix potential use-after-free Yan, Zheng
@ 2017-04-05  1:30 ` Yan, Zheng
  2017-04-05  1:30 ` [PATCH 5/5] ceph: make seeky readdir more efficiency Yan, Zheng
  2017-04-05 14:16 ` [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Jeff Layton
  4 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2017-04-05  1:30 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

If a mds has stopped, close its session and clean up its session
requests/caps. The process is similar to handling SESSION_CLOSE
initiated by mds.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/mds_client.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bf765a8..5584c98 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3156,6 +3156,22 @@ static void check_new_map(struct ceph_mds_client *mdsc,
 				__unregister_session(mdsc, s);
 				__wake_requests(mdsc, &s->s_waiting);
 				ceph_put_mds_session(s);
+			} else if (i >= newmap->m_num_mds) {
+				/* force close session for stopped mds */
+				get_session(s);
+				__unregister_session(mdsc, s);
+				__wake_requests(mdsc, &s->s_waiting);
+				kick_requests(mdsc, i);
+				mutex_unlock(&mdsc->mutex);
+
+				mutex_lock(&s->s_mutex);
+				cleanup_session_requests(mdsc, s);
+				remove_session_caps(s);
+				mutex_unlock(&s->s_mutex);
+
+				ceph_put_mds_session(s);
+
+				mutex_lock(&mdsc->mutex);
 			} else {
 				/* just close it */
 				mutex_unlock(&mdsc->mutex);
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] ceph: make seeky readdir more efficiency
  2017-04-05  1:30 [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Yan, Zheng
                   ` (2 preceding siblings ...)
  2017-04-05  1:30 ` [PATCH 4/5] ceph: close stopped mds' session Yan, Zheng
@ 2017-04-05  1:30 ` Yan, Zheng
  2017-04-05 14:16 ` [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Jeff Layton
  4 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2017-04-05  1:30 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Current cephfs client uses string to indicate start position of
readdir. The string is last entry of previous readdir reply.
This approach does not work for seeky readdir because we can
not easily convert the new postion to a string. For seeky readdir,
mds needs to return dentries from the beginning. Client keeps
retrying if the reply does not contain the dentry it wants.

In current version of ceph, mds sorts CDentry in its cache in
hash order. Client also uses dentry hash to compose dir postion.
For seeky readdir, if client passes the hash part of dir postion
to mds. mds can avoid replying useless dentries.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/dir.c                |  4 ++++
 fs/ceph/inode.c              | 17 ++++++++++++-----
 fs/ceph/mds_client.c         |  1 +
 fs/ceph/mds_client.h         |  3 ++-
 include/linux/ceph/ceph_fs.h |  2 ++
 5 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e9ad50..ae61cdf 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -378,7 +378,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 				ceph_mdsc_put_request(req);
 				return -ENOMEM;
 			}
+		} else if (is_hash_order(ctx->pos)) {
+			req->r_args.readdir.offset_hash =
+				cpu_to_le32(fpos_hash(ctx->pos));
 		}
+
 		req->r_dir_release_cnt = fi->dir_release_count;
 		req->r_dir_ordered_cnt = fi->dir_ordered_count;
 		req->r_readdir_cache_idx = fi->readdir_cache_idx;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d449e1c..efee88c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
 		return readdir_prepopulate_inodes_only(req, session);
 
-	if (rinfo->hash_order && req->r_path2) {
-		last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
-					  req->r_path2, strlen(req->r_path2));
-		last_hash = ceph_frag_value(last_hash);
+	if (rinfo->hash_order) {
+		if (req->r_path2) {
+			last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+						  req->r_path2,
+						  strlen(req->r_path2));
+			last_hash = ceph_frag_value(last_hash);
+		} else if (rinfo->offset_hash) {
+			/* mds understands offset_hash */
+			WARN_ON_ONCE(req->r_readdir_offset != 2);
+			last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
+		}
 	}
 
 	if (rinfo->dir_dir &&
@@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	}
 
 	if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
-	    !(rinfo->hash_order && req->r_path2)) {
+	    !(rinfo->hash_order && last_hash)) {
 		/* note dir version at start of readdir so we can tell
 		 * if any dentries get dropped */
 		req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5584c98..f7bfc22 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end,
 		info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
 		info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
 		info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
+		info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
 	}
 	if (num == 0)
 		goto done;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 5416675..db57ae9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -83,9 +83,10 @@ struct ceph_mds_reply_info_parsed {
 			struct ceph_mds_reply_dirfrag *dir_dir;
 			size_t			      dir_buf_size;
 			int                           dir_nr;
-			bool			      dir_complete;
 			bool			      dir_end;
+			bool			      dir_complete;
 			bool			      hash_order;
+			bool			      offset_hash;
 			struct ceph_mds_reply_dir_entry  *dir_entries;
 		};
 
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index f4b2ee1..1787e4a 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -365,6 +365,7 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_READDIR_FRAG_END		(1<<0)
 #define CEPH_READDIR_FRAG_COMPLETE	(1<<8)
 #define CEPH_READDIR_HASH_ORDER		(1<<9)
+#define CEPH_READDIR_OFFSET_HASH	(1<<10)
 
 union ceph_mds_request_args {
 	struct {
@@ -384,6 +385,7 @@ union ceph_mds_request_args {
 		__le32 max_entries;          /* how many dentries to grab */
 		__le32 max_bytes;
 		__le16 flags;
+		__le32 offset_hash;
 	} __attribute__ ((packed)) readdir;
 	struct {
 		__le32 mode;
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds
  2017-04-05  1:30 ` [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds Yan, Zheng
@ 2017-04-05  4:39   ` Patrick Donnelly
  2017-04-05 22:47   ` Luis Henriques
  1 sibling, 0 replies; 11+ messages in thread
From: Patrick Donnelly @ 2017-04-05  4:39 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: Ceph Development, Jeff Layton

On Tue, Apr 4, 2017 at 9:30 PM, Yan, Zheng <zyan@redhat.com> wrote:
> mdsmap::m_max_mds is the expected count of active mds. It's not the
> max rank of active mds. User can decrease mdsmap::m_max_mds, but does
> not stop mds whose rank >= mdsmap::m_max_mds.

This is related to http://tracker.ceph.com/issues/17259

-- 
Patrick Donnelly

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps()
  2017-04-05  1:30 [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Yan, Zheng
                   ` (3 preceding siblings ...)
  2017-04-05  1:30 ` [PATCH 5/5] ceph: make seeky readdir more efficiency Yan, Zheng
@ 2017-04-05 14:16 ` Jeff Layton
  2017-04-06  0:04   ` Yan, Zheng
  4 siblings, 1 reply; 11+ messages in thread
From: Jeff Layton @ 2017-04-05 14:16 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel

On Wed, 2017-04-05 at 09:30 +0800, Yan, Zheng wrote:
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/file.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 579a16c..0480492 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -123,7 +123,7 @@ int ceph_renew_caps(struct inode *inode)
>  	spin_lock(&ci->i_ceph_lock);
>  	wanted = __ceph_caps_file_wanted(ci);
>  	if (__ceph_is_any_real_caps(ci) &&
> -	    (!(wanted & CEPH_CAP_ANY_WR) == 0 || ci->i_auth_cap)) {
> +	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
>  		int issued = __ceph_caps_issued(ci, NULL);
>  		spin_unlock(&ci->i_ceph_lock);
>  		dout("renew caps %p want %s issued %s updating mds_wanted\n",

That certainly looks more like what was intended, but I'm still a
little unclear on why we have so much special casing in all of this
caps handling.

Why do we skip ceph_check_caps if we want CEPH_CAP_ANY_WR?
-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/5] ceph: fix potential use-after-free
  2017-04-05  1:30 ` [PATCH 3/5] ceph: fix potential use-after-free Yan, Zheng
@ 2017-04-05 17:21   ` Jeff Layton
  2017-04-05 23:59     ` Yan, Zheng
  0 siblings, 1 reply; 11+ messages in thread
From: Jeff Layton @ 2017-04-05 17:21 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel

On Wed, 2017-04-05 at 09:30 +0800, Yan, Zheng wrote:
> __unregister_session() free the session if it drops the last
> reference. We should grab an extra reference if we want to use
> session after __unregister_session().
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/mds_client.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 163f0d3..bf765a8 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2658,8 +2658,10 @@ static void handle_session(struct ceph_mds_session *session,
>  	seq = le64_to_cpu(h->seq);
>  
>  	mutex_lock(&mdsc->mutex);
> -	if (op == CEPH_SESSION_CLOSE)
> +	if (op == CEPH_SESSION_CLOSE) {
> +		get_session(session);
>  		__unregister_session(mdsc, session);
> +	}
>  	/* FIXME: this ttl calculation is generous */
>  	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
>  	mutex_unlock(&mdsc->mutex);
> @@ -2748,6 +2750,8 @@ static void handle_session(struct ceph_mds_session *session,
>  			kick_requests(mdsc, mds);
>  		mutex_unlock(&mdsc->mutex);
>  	}
> +	if (op == CEPH_SESSION_CLOSE)
> +		ceph_put_mds_session(session);
>  	return;
>  
>  bad:
> @@ -3148,8 +3152,10 @@ static void check_new_map(struct ceph_mds_client *mdsc,
>  			if (s->s_state == CEPH_MDS_SESSION_OPENING) {
>  				/* the session never opened, just close it
>  				 * out now */
> -				__wake_requests(mdsc, &s->s_waiting);
> +				get_session(s);
>  				__unregister_session(mdsc, s);
> +				__wake_requests(mdsc, &s->s_waiting);
> +				ceph_put_mds_session(s);

What about this last bit? Why do we need to __wake_requests after
__unregister_session here? If not for that change then you wouldn't
need to take the extra reference here, AFAICS.

>  			} else {
>  				/* just close it */
>  				mutex_unlock(&mdsc->mutex);

Reviewed-by: Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds
  2017-04-05  1:30 ` [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds Yan, Zheng
  2017-04-05  4:39   ` Patrick Donnelly
@ 2017-04-05 22:47   ` Luis Henriques
  1 sibling, 0 replies; 11+ messages in thread
From: Luis Henriques @ 2017-04-05 22:47 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, jlayton

On Wed, Apr 05, 2017 at 09:30:16AM +0800, Yan, Zheng wrote:
> mdsmap::m_max_mds is the expected count of active mds. It's not the
> max rank of active mds. User can decrease mdsmap::m_max_mds, but does
> not stop mds whose rank >= mdsmap::m_max_mds.
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/debugfs.c           | 23 +++++++++++------------
>  fs/ceph/mds_client.c        | 10 +++++-----
>  fs/ceph/mdsmap.c            | 44 +++++++++++++++++++++++++++++++++++++-------
>  include/linux/ceph/mdsmap.h |  7 ++++---
>  4 files changed, 57 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index f2ae393..1ff62ff 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -22,20 +22,19 @@ static int mdsmap_show(struct seq_file *s, void *p)
>  {
>  	int i;
>  	struct ceph_fs_client *fsc = s->private;
> +	struct ceph_mdsmap *mdsmap = fsc->mdsc->mdsmap;
>  
> -	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
> +	if (fsc->mdsc == NULL || mdsmap == NULL)

Checking fsc->mdsc for NULL should be done before initializing mdsmap,
otherwise it's a bit late.

>  		return 0;
> -	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
> -	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
> -	seq_printf(s, "session_timeout %d\n",
> -		       fsc->mdsc->mdsmap->m_session_timeout);
> -	seq_printf(s, "session_autoclose %d\n",
> -		       fsc->mdsc->mdsmap->m_session_autoclose);
> -	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
> -		struct ceph_entity_addr *addr =
> -			&fsc->mdsc->mdsmap->m_info[i].addr;
> -		int state = fsc->mdsc->mdsmap->m_info[i].state;
> -
> +	seq_printf(s, "epoch %d\n", mdsmap->m_epoch);
> +	seq_printf(s, "root %d\n", mdsmap->m_root);
> +	seq_printf(s, "root %d\n", mdsmap->m_root);

Duplicated line.

Cheers,
--
Luís


> +	seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
> +	seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
> +	seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
> +	for (i = 0; i < mdsmap->m_num_mds; i++) {
> +		struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
> +		int state = mdsmap->m_info[i].state;
>  		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
>  			       ceph_pr_addr(&addr->in_addr),
>  			       ceph_mds_state_name(state));
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 820bf0f..163f0d3 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -441,7 +441,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>  {
>  	struct ceph_mds_session *s;
>  
> -	if (mds >= mdsc->mdsmap->m_max_mds)
> +	if (mds >= mdsc->mdsmap->m_num_mds)
>  		return ERR_PTR(-EINVAL);
>  
>  	s = kzalloc(sizeof(*s), GFP_NOFS);
> @@ -1004,7 +1004,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
>  	struct ceph_mds_session *ts;
>  	int i, mds = session->s_mds;
>  
> -	if (mds >= mdsc->mdsmap->m_max_mds)
> +	if (mds >= mdsc->mdsmap->m_num_mds)
>  		return;
>  
>  	mi = &mdsc->mdsmap->m_info[mds];
> @@ -3127,7 +3127,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
>  	dout("check_new_map new %u old %u\n",
>  	     newmap->m_epoch, oldmap->m_epoch);
>  
> -	for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) {
> +	for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) {
>  		if (mdsc->sessions[i] == NULL)
>  			continue;
>  		s = mdsc->sessions[i];
> @@ -3141,7 +3141,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
>  		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
>  		     ceph_session_state_name(s->s_state));
>  
> -		if (i >= newmap->m_max_mds ||
> +		if (i >= newmap->m_num_mds ||
>  		    memcmp(ceph_mdsmap_get_addr(oldmap, i),
>  			   ceph_mdsmap_get_addr(newmap, i),
>  			   sizeof(struct ceph_entity_addr))) {
> @@ -3187,7 +3187,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
>  		}
>  	}
>  
> -	for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
> +	for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) {
>  		s = mdsc->sessions[i];
>  		if (!s)
>  			continue;
> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 5454e23..1a748cf 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -22,11 +22,11 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>  	int i;
>  
>  	/* special case for one mds */
> -	if (1 == m->m_max_mds && m->m_info[0].state > 0)
> +	if (1 == m->m_num_mds && m->m_info[0].state > 0)
>  		return 0;
>  
>  	/* count */
> -	for (i = 0; i < m->m_max_mds; i++)
> +	for (i = 0; i < m->m_num_mds; i++)
>  		if (m->m_info[i].state > 0)
>  			n++;
>  	if (n == 0)
> @@ -135,8 +135,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>  	m->m_session_autoclose = ceph_decode_32(p);
>  	m->m_max_file_size = ceph_decode_64(p);
>  	m->m_max_mds = ceph_decode_32(p);
> +	m->m_num_mds = m->m_max_mds;
>  
> -	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
> +	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
>  	if (m->m_info == NULL)
>  		goto nomem;
>  
> @@ -207,9 +208,20 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>  		     ceph_pr_addr(&addr.in_addr),
>  		     ceph_mds_state_name(state));
>  
> -		if (mds < 0 || mds >= m->m_max_mds || state <= 0)
> +		if (mds < 0 || state <= 0)
>  			continue;
>  
> +		if (mds >= m->m_num_mds) {
> +			int new_num = max(mds + 1, m->m_num_mds * 2);
> +			void *new_m_info = krealloc(m->m_info,
> +						new_num * sizeof(*m->m_info),
> +						GFP_NOFS | __GFP_ZERO);
> +			if (!new_m_info)
> +				goto nomem;
> +			m->m_info = new_m_info;
> +			m->m_num_mds = new_num;
> +		}
> +
>  		info = &m->m_info[mds];
>  		info->global_id = global_id;
>  		info->state = state;
> @@ -229,6 +241,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>  			info->export_targets = NULL;
>  		}
>  	}
> +	if (m->m_num_mds > m->m_max_mds) {
> +		/* find max up mds */
> +		for (i = m->m_num_mds; i >= m->m_max_mds; i--) {
> +			if (i == 0 || m->m_info[i-1].state > 0)
> +				break;
> +		}
> +		m->m_num_mds = i;
> +	}
>  
>  	/* pg_pools */
>  	ceph_decode_32_safe(p, end, n, bad);
> @@ -270,12 +290,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>  
>  		for (i = 0; i < n; i++) {
>  			s32 mds = ceph_decode_32(p);
> -			if (mds >= 0 && mds < m->m_max_mds) {
> +			if (mds >= 0 && mds < m->m_num_mds) {
>  				if (m->m_info[mds].laggy)
>  					num_laggy++;
>  			}
>  		}
>  		m->m_num_laggy = num_laggy;
> +
> +		if (n > m->m_num_mds) {
> +			void *new_m_info = krealloc(m->m_info,
> +						    n * sizeof(*m->m_info),
> +						    GFP_NOFS | __GFP_ZERO);
> +			if (!new_m_info)
> +				goto nomem;
> +			m->m_info = new_m_info;
> +		}
> +		m->m_num_mds = n;
>  	}
>  
>  	/* inc */
> @@ -341,7 +371,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
>  {
>  	int i;
>  
> -	for (i = 0; i < m->m_max_mds; i++)
> +	for (i = 0; i < m->m_num_mds; i++)
>  		kfree(m->m_info[i].export_targets);
>  	kfree(m->m_info);
>  	kfree(m->m_data_pg_pools);
> @@ -357,7 +387,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
>  		return false;
>  	if (m->m_num_laggy > 0)
>  		return false;
> -	for (i = 0; i < m->m_max_mds; i++) {
> +	for (i = 0; i < m->m_num_mds; i++) {
>  		if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
>  			nr_active++;
>  	}
> diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
> index 8ed5dc5..d5f783f 100644
> --- a/include/linux/ceph/mdsmap.h
> +++ b/include/linux/ceph/mdsmap.h
> @@ -25,6 +25,7 @@ struct ceph_mdsmap {
>  	u32 m_session_autoclose;        /* seconds */
>  	u64 m_max_file_size;
>  	u32 m_max_mds;                  /* size of m_addr, m_state arrays */
> +	int m_num_mds;
>  	struct ceph_mds_info *m_info;
>  
>  	/* which object pools file data can be stored in */
> @@ -40,7 +41,7 @@ struct ceph_mdsmap {
>  static inline struct ceph_entity_addr *
>  ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
>  {
> -	if (w >= m->m_max_mds)
> +	if (w >= m->m_num_mds)
>  		return NULL;
>  	return &m->m_info[w].addr;
>  }
> @@ -48,14 +49,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
>  static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
>  {
>  	BUG_ON(w < 0);
> -	if (w >= m->m_max_mds)
> +	if (w >= m->m_num_mds)
>  		return CEPH_MDS_STATE_DNE;
>  	return m->m_info[w].state;
>  }
>  
>  static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
>  {
> -	if (w >= 0 && w < m->m_max_mds)
> +	if (w >= 0 && w < m->m_num_mds)
>  		return m->m_info[w].laggy;
>  	return false;
>  }
> -- 
> 2.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/5] ceph: fix potential use-after-free
  2017-04-05 17:21   ` Jeff Layton
@ 2017-04-05 23:59     ` Yan, Zheng
  0 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2017-04-05 23:59 UTC (permalink / raw)
  To: Jeff Layton; +Cc: ceph-devel


> On 6 Apr 2017, at 01:21, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Wed, 2017-04-05 at 09:30 +0800, Yan, Zheng wrote:
>> __unregister_session() free the session if it drops the last
>> reference. We should grab an extra reference if we want to use
>> session after __unregister_session().
>> 
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>> fs/ceph/mds_client.c | 10 ++++++++--
>> 1 file changed, 8 insertions(+), 2 deletions(-)
>> 
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 163f0d3..bf765a8 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -2658,8 +2658,10 @@ static void handle_session(struct ceph_mds_session *session,
>> 	seq = le64_to_cpu(h->seq);
>> 
>> 	mutex_lock(&mdsc->mutex);
>> -	if (op == CEPH_SESSION_CLOSE)
>> +	if (op == CEPH_SESSION_CLOSE) {
>> +		get_session(session);
>> 		__unregister_session(mdsc, session);
>> +	}
>> 	/* FIXME: this ttl calculation is generous */
>> 	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
>> 	mutex_unlock(&mdsc->mutex);
>> @@ -2748,6 +2750,8 @@ static void handle_session(struct ceph_mds_session *session,
>> 			kick_requests(mdsc, mds);
>> 		mutex_unlock(&mdsc->mutex);
>> 	}
>> +	if (op == CEPH_SESSION_CLOSE)
>> +		ceph_put_mds_session(session);
>> 	return;
>> 
>> bad:
>> @@ -3148,8 +3152,10 @@ static void check_new_map(struct ceph_mds_client *mdsc,
>> 			if (s->s_state == CEPH_MDS_SESSION_OPENING) {
>> 				/* the session never opened, just close it
>> 				 * out now */
>> -				__wake_requests(mdsc, &s->s_waiting);
>> +				get_session(s);
>> 				__unregister_session(mdsc, s);
>> +				__wake_requests(mdsc, &s->s_waiting);
>> +				ceph_put_mds_session(s);
> 
> What about this last bit? Why do we need to __wake_requests after
> __unregister_session here? If not for that change then you wouldn't
> need to take the extra reference here, AFAICS.

wake_requests() calls __do_request(). I think do_request may choose the session we want to unregister.

> 
>> 			} else {
>> 				/* just close it */
>> 				mutex_unlock(&mdsc->mutex);
> 
> Reviewed-by: Jeff Layton <jlayton@redhat.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps()
  2017-04-05 14:16 ` [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Jeff Layton
@ 2017-04-06  0:04   ` Yan, Zheng
  0 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2017-04-06  0:04 UTC (permalink / raw)
  To: Jeff Layton; +Cc: ceph-devel


> On 5 Apr 2017, at 22:16, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Wed, 2017-04-05 at 09:30 +0800, Yan, Zheng wrote:
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>> fs/ceph/file.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>> 
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 579a16c..0480492 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -123,7 +123,7 @@ int ceph_renew_caps(struct inode *inode)
>> 	spin_lock(&ci->i_ceph_lock);
>> 	wanted = __ceph_caps_file_wanted(ci);
>> 	if (__ceph_is_any_real_caps(ci) &&
>> -	    (!(wanted & CEPH_CAP_ANY_WR) == 0 || ci->i_auth_cap)) {
>> +	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
>> 		int issued = __ceph_caps_issued(ci, NULL);
>> 		spin_unlock(&ci->i_ceph_lock);
>> 		dout("renew caps %p want %s issued %s updating mds_wanted\n",
> 
> That certainly looks more like what was intended, but I'm still a
> little unclear on why we have so much special casing in all of this
> caps handling.
> 
> Why do we skip ceph_check_caps if we want CEPH_CAP_ANY_WR?

It’s for multiple active mds setup. client can request read caps from any mds (that replicates the inode). client need to request write caps from auth mds.

Regards
Yan, Zheng 

> -- 
> Jeff Layton <jlayton@redhat.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-04-06  0:04 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-05  1:30 [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Yan, Zheng
2017-04-05  1:30 ` [PATCH 2/5] ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds Yan, Zheng
2017-04-05  4:39   ` Patrick Donnelly
2017-04-05 22:47   ` Luis Henriques
2017-04-05  1:30 ` [PATCH 3/5] ceph: fix potential use-after-free Yan, Zheng
2017-04-05 17:21   ` Jeff Layton
2017-04-05 23:59     ` Yan, Zheng
2017-04-05  1:30 ` [PATCH 4/5] ceph: close stopped mds' session Yan, Zheng
2017-04-05  1:30 ` [PATCH 5/5] ceph: make seeky readdir more efficiency Yan, Zheng
2017-04-05 14:16 ` [PATCH 1/5] ceph: fix wrong check in ceph_renew_caps() Jeff Layton
2017-04-06  0:04   ` Yan, Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.