* [PATCH 0/3] rbd: make sure we have latest osdmap on 'rbd map'
@ 2014-04-24 16:23 Ilya Dryomov
2014-04-24 16:23 ` [PATCH 1/3] libceph: mon_get_version request infrastructure Ilya Dryomov
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Ilya Dryomov @ 2014-04-24 16:23 UTC (permalink / raw)
To: ceph-devel
Hello,
This is a rewrite of the fix for #8184 to use MMonGetVersion message
instead of a kludgy timeout.
Thanks,
Ilya
Ilya Dryomov (3):
libceph: mon_get_version request infrastructure
libceph: add ceph_monc_wait_osdmap()
rbd: make sure we have latest osdmap on 'rbd map'
drivers/block/rbd.c | 27 ++++++--
include/linux/ceph/mon_client.h | 13 ++--
net/ceph/ceph_common.c | 2 +
net/ceph/debugfs.c | 2 +
net/ceph/mon_client.c | 146 +++++++++++++++++++++++++++++++++++++--
5 files changed, 175 insertions(+), 15 deletions(-)
--
1.7.10.4
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] libceph: mon_get_version request infrastructure
2014-04-24 16:23 [PATCH 0/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
@ 2014-04-24 16:23 ` Ilya Dryomov
2014-04-24 16:23 ` [PATCH 2/3] libceph: add ceph_monc_wait_osdmap() Ilya Dryomov
2014-04-24 16:23 ` [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
2 siblings, 0 replies; 7+ messages in thread
From: Ilya Dryomov @ 2014-04-24 16:23 UTC (permalink / raw)
To: ceph-devel
Add support for mon_get_version requests to libceph. This reuses much
of the ceph_mon_generic_request infrastructure, with one exception.
The mon_get_version reply hdr->tid is always 0 and that makes it
impossible to lookup ceph_mon_generic_request contexts by tid in
get_generic_reply(). So, instead of preallocating reply messages like
its done for statfs and poolop requests, we allocate a reply message on
the reply path. This can probably interfere with revoke, but I don't
see a better way.
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
---
include/linux/ceph/mon_client.h | 11 ++--
net/ceph/ceph_common.c | 2 +
net/ceph/debugfs.c | 2 +
net/ceph/mon_client.c | 119 ++++++++++++++++++++++++++++++++++++---
4 files changed, 123 insertions(+), 11 deletions(-)
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index a486f390dfbe..d8060dd87f16 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -40,9 +40,9 @@ struct ceph_mon_request {
};
/*
- * ceph_mon_generic_request is being used for the statfs and poolop requests
- * which are bening done a bit differently because we need to get data back
- * to the caller
+ * ceph_mon_generic_request is being used for the statfs, poolop and
+ * mon_get_version requests which are being done a bit differently
+ * because we need to get data back to the caller
*/
struct ceph_mon_generic_request {
struct kref kref;
@@ -53,7 +53,7 @@ struct ceph_mon_generic_request {
int buf_len;
struct completion completion;
struct ceph_msg *request; /* original request */
- struct ceph_msg *reply; /* and reply */
+ struct ceph_msg *reply; /* and reply, NULL for mon_get_version */
};
struct ceph_mon_client {
@@ -108,6 +108,9 @@ extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
struct ceph_statfs *buf);
+extern int ceph_monc_do_get_version(struct ceph_mon_client *monc,
+ const char *what, u64 *newest);
+
extern int ceph_monc_open_session(struct ceph_mon_client *monc);
extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721d237e..1675021d8c12 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
case CEPH_MSG_STATFS: return "statfs";
case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+ case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
+ case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
case CEPH_MSG_MDS_MAP: return "mds_map";
case CEPH_MSG_CLIENT_SESSION: return "client_session";
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 10421a4b76f8..45405aadee11 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -127,6 +127,8 @@ static int monc_show(struct seq_file *s, void *p)
op = le16_to_cpu(req->request->hdr.type);
if (op == CEPH_MSG_STATFS)
seq_printf(s, "%lld statfs\n", req->tid);
+ else if (op == CEPH_MSG_MON_GET_VERSION)
+ seq_printf(s, "%llu get_version", req->tid);
else
seq_printf(s, "%lld unknown\n", req->tid);
}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef35110b..d64023965d1c 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -477,15 +477,12 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
return m;
}
-static int do_generic_request(struct ceph_mon_client *monc,
- struct ceph_mon_generic_request *req)
+static int __do_generic_request(struct ceph_mon_client *monc,
+ struct ceph_mon_generic_request *req)
{
int err;
/* register request */
- mutex_lock(&monc->mutex);
- req->tid = ++monc->last_tid;
- req->request->hdr.tid = cpu_to_le64(req->tid);
__insert_generic_request(monc, req);
monc->num_generic_requests++;
ceph_con_send(&monc->con, ceph_msg_get(req->request));
@@ -496,13 +493,27 @@ static int do_generic_request(struct ceph_mon_client *monc,
mutex_lock(&monc->mutex);
rb_erase(&req->node, &monc->generic_request_tree);
monc->num_generic_requests--;
- mutex_unlock(&monc->mutex);
if (!err)
err = req->result;
return err;
}
+static int do_generic_request(struct ceph_mon_client *monc,
+ struct ceph_mon_generic_request *req)
+{
+ int err;
+
+ mutex_lock(&monc->mutex);
+ req->tid = ++monc->last_tid;
+ req->request->hdr.tid = cpu_to_le64(req->tid);
+
+ err = __do_generic_request(monc, req);
+
+ mutex_unlock(&monc->mutex);
+ return err;
+}
+
/*
* statfs
*/
@@ -579,6 +590,94 @@ out:
}
EXPORT_SYMBOL(ceph_monc_do_statfs);
+static void handle_get_version_reply(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ void *p, *end;
+ u64 handle;
+ u64 version;
+
+ p = msg->front.iov_base;
+ end = p + msg->front_alloc_len;
+
+ ceph_decode_64_safe(&p, end, handle, bad);
+ ceph_decode_64_safe(&p, end, version, bad);
+
+ mutex_lock(&monc->mutex);
+ req = __lookup_generic_req(monc, handle);
+ if (req) {
+ dout("%s handle %llu version %llu\n", __func__, handle,
+ version);
+ *(u64 *)req->buf = version;
+ req->result = 0;
+ get_generic_request(req);
+ } else {
+ pr_info("%s unknown handle %llu\n", __func__, handle);
+ }
+ mutex_unlock(&monc->mutex);
+ if (req) {
+ complete_all(&req->completion);
+ put_generic_request(req);
+ }
+
+ BUG_ON(req && req->reply);
+
+ return;
+bad:
+ pr_err("corrupt mon_get_version reply\n");
+ ceph_msg_dump(msg);
+}
+
+int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
+ u64 *newest)
+{
+ struct ceph_mon_generic_request *req;
+ void *p, *end;
+ int err;
+
+ req = kzalloc(sizeof(*req), GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ kref_init(&req->kref);
+ req->buf = newest;
+ req->buf_len = sizeof(*newest);
+ init_completion(&req->completion);
+
+ req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
+ sizeof(u64) + sizeof(u32) + strlen(what),
+ GFP_NOFS, true);
+ if (!req->request) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Preallocating a reply ceph_msg for mon_get_version requests
+ * is pointless - their reply hdr->tid is always 0, which means
+ * that the ceph_mon_generic_request cannot be looked up in
+ * get_generic_reply().
+ */
+
+ p = req->request->front.iov_base;
+ end = p + req->request->front_alloc_len;
+
+ /* fill out request */
+ mutex_lock(&monc->mutex);
+ req->tid = ++monc->last_tid;
+ ceph_encode_64(&p, req->tid); /* handle */
+ ceph_encode_string(&p, end, what, strlen(what));
+
+ err = __do_generic_request(monc, req);
+
+ mutex_unlock(&monc->mutex);
+out:
+ kref_put(&req->kref, release_generic_request);
+ return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_get_version);
+
/*
* pool ops
*/
@@ -712,7 +811,8 @@ static void __resend_generic_request(struct ceph_mon_client *monc)
for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
req = rb_entry(p, struct ceph_mon_generic_request, node);
ceph_msg_revoke(req->request);
- ceph_msg_revoke_incoming(req->reply);
+ if (req->reply)
+ ceph_msg_revoke_incoming(req->reply);
ceph_con_send(&monc->con, ceph_msg_get(req->request));
}
}
@@ -981,6 +1081,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_statfs_reply(monc, msg);
break;
+ case CEPH_MSG_MON_GET_VERSION_REPLY:
+ handle_get_version_reply(monc, msg);
+ break;
+
case CEPH_MSG_POOLOP_REPLY:
handle_poolop_reply(monc, msg);
break;
@@ -1032,6 +1136,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_MON_MAP:
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
+ case CEPH_MSG_MON_GET_VERSION_REPLY:
m = ceph_msg_new(type, front_len, GFP_NOFS, false);
if (!m)
return NULL; /* ENOMEM--return skip == 0 */
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] libceph: add ceph_monc_wait_osdmap()
2014-04-24 16:23 [PATCH 0/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
2014-04-24 16:23 ` [PATCH 1/3] libceph: mon_get_version request infrastructure Ilya Dryomov
@ 2014-04-24 16:23 ` Ilya Dryomov
2014-04-24 16:23 ` [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
2 siblings, 0 replies; 7+ messages in thread
From: Ilya Dryomov @ 2014-04-24 16:23 UTC (permalink / raw)
To: ceph-devel
Add ceph_monc_wait_osdmap(), which will block until the osdmap with the
specified epoch is received or timeout occurs.
Export both of these as they are going to be needed by rbd.
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
---
include/linux/ceph/mon_client.h | 2 ++
net/ceph/mon_client.c | 27 +++++++++++++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index d8060dd87f16..99ea0d60042f 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -104,6 +104,8 @@ extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+ unsigned long timeout);
extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
struct ceph_statfs *buf);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index d64023965d1c..7aa1ab1dbbf6 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
__send_subscribe(monc);
mutex_unlock(&monc->mutex);
}
+EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+
+int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+ unsigned long timeout)
+{
+ unsigned long started = jiffies;
+ int ret;
+
+ mutex_lock(&monc->mutex);
+ while (monc->have_osdmap < epoch) {
+ mutex_unlock(&monc->mutex);
+
+ if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+ return -ETIMEDOUT;
+
+ ret = wait_event_interruptible_timeout(monc->client->auth_wq,
+ monc->have_osdmap >= epoch, timeout);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&monc->mutex);
+ }
+
+ mutex_unlock(&monc->mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_monc_wait_osdmap);
/*
*
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map'
2014-04-24 16:23 [PATCH 0/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
2014-04-24 16:23 ` [PATCH 1/3] libceph: mon_get_version request infrastructure Ilya Dryomov
2014-04-24 16:23 ` [PATCH 2/3] libceph: add ceph_monc_wait_osdmap() Ilya Dryomov
@ 2014-04-24 16:23 ` Ilya Dryomov
2014-04-30 12:28 ` [PATCH v2] " Ilya Dryomov
2014-05-07 16:03 ` [PATCH 3/3] " Sage Weil
2 siblings, 2 replies; 7+ messages in thread
From: Ilya Dryomov @ 2014-04-24 16:23 UTC (permalink / raw)
To: ceph-devel
Given an existing idle mapping (img1), mapping an image (img2) in
a newly created pool (pool2) fails:
$ ceph osd pool create pool1 8 8
$ rbd create --size 1000 pool1/img1
$ sudo rbd map pool1/img1
$ ceph osd pool create pool2 8 8
$ rbd create --size 1000 pool2/img2
$ sudo rbd map pool2/img2
rbd: sysfs write failed
rbd: map failed: (2) No such file or directory
This is because client instances are shared by default and we don't
request an osdmap update when bumping a ref on an existing client. The
fix is to use the mon_get_version request to see if the osdmap we have
is the latest, and block until the requested update is received if it's
not.
Fixes: http://tracker.ceph.com/issues/8184
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
---
drivers/block/rbd.c | 27 +++++++++++++++++++++++----
1 file changed, 23 insertions(+), 4 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 552a2edcaa74..a3734726eef9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -723,15 +723,34 @@ static int parse_rbd_opts_token(char *c, void *private)
static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
{
struct rbd_client *rbdc;
+ u64 newest_epoch;
mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
rbdc = rbd_client_find(ceph_opts);
- if (rbdc) /* using an existing client */
- ceph_destroy_options(ceph_opts);
- else
+ if (!rbdc) {
rbdc = rbd_client_create(ceph_opts);
- mutex_unlock(&client_mutex);
+ mutex_unlock(&client_mutex);
+ return rbdc;
+ }
+
+ /*
+ * Using an existing client, make sure we've got the latest
+ * osdmap. Ignore the errors though, as failing to get it
+ * doesn't necessarily prevent from working.
+ */
+ if (ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
+ &newest_epoch) < 0)
+ goto out;
+
+ if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
+ ceph_monc_request_next_osdmap(&rbdc->client->monc);
+ (void) ceph_monc_wait_osdmap(&rbdc->client->monc, newest_epoch,
+ rbdc->client->options->mount_timeout * HZ);
+ }
+out:
+ mutex_unlock(&client_mutex);
+ ceph_destroy_options(ceph_opts);
return rbdc;
}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2] rbd: make sure we have latest osdmap on 'rbd map'
2014-04-24 16:23 ` [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
@ 2014-04-30 12:28 ` Ilya Dryomov
2014-05-07 16:03 ` [PATCH 3/3] " Sage Weil
1 sibling, 0 replies; 7+ messages in thread
From: Ilya Dryomov @ 2014-04-30 12:28 UTC (permalink / raw)
To: ceph-devel
Given an existing idle mapping (img1), mapping an image (img2) in
a newly created pool (pool2) fails:
$ ceph osd pool create pool1 8 8
$ rbd create --size 1000 pool1/img1
$ sudo rbd map pool1/img1
$ ceph osd pool create pool2 8 8
$ rbd create --size 1000 pool2/img2
$ sudo rbd map pool2/img2
rbd: sysfs write failed
rbd: map failed: (2) No such file or directory
This is because client instances are shared by default and we don't
request an osdmap update when bumping a ref on an existing client. The
fix is to use the mon_get_version request to see if the osdmap we have
is the latest, and block until the requested update is received if it's
not.
Fixes: http://tracker.ceph.com/issues/8184
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
---
v2:
- send mon_get_version request and wait for a reply only if we were
unable to locate the pool (i.e. don't hurt the common case)
drivers/block/rbd.c | 36 +++++++++++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 552a2edcaa74..daf7b4659b4a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4683,6 +4683,38 @@ out_err:
}
/*
+ * Return pool id (>= 0) or a negative error code.
+ */
+static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
+{
+ u64 newest_epoch;
+ unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
+ int tries = 0;
+ int ret;
+
+again:
+ ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
+ if (ret == -ENOENT && tries++ < 1) {
+ ret = ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
+ &newest_epoch);
+ if (ret < 0)
+ return ret;
+
+ if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
+ ceph_monc_request_next_osdmap(&rbdc->client->monc);
+ (void) ceph_monc_wait_osdmap(&rbdc->client->monc,
+ newest_epoch, timeout);
+ goto again;
+ } else {
+ /* the osdmap we have is new enough */
+ return -ENOENT;
+ }
+ }
+
+ return ret;
+}
+
+/*
* An rbd format 2 image has a unique identifier, distinct from the
* name given to it by the user. Internally, that identifier is
* what's used to specify the names of objects related to the image.
@@ -5053,7 +5085,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
struct rbd_options *rbd_opts = NULL;
struct rbd_spec *spec = NULL;
struct rbd_client *rbdc;
- struct ceph_osd_client *osdc;
bool read_only;
int rc = -ENOMEM;
@@ -5075,8 +5106,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
}
/* pick the pool */
- osdc = &rbdc->client->osdc;
- rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
+ rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
if (rc < 0)
goto err_out_client;
spec->pool_id = (u64)rc;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map'
2014-04-24 16:23 ` [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
2014-04-30 12:28 ` [PATCH v2] " Ilya Dryomov
@ 2014-05-07 16:03 ` Sage Weil
2014-05-07 16:04 ` Sage Weil
1 sibling, 1 reply; 7+ messages in thread
From: Sage Weil @ 2014-05-07 16:03 UTC (permalink / raw)
To: Ilya Dryomov; +Cc: ceph-devel
On Thu, 24 Apr 2014, Ilya Dryomov wrote:
> Given an existing idle mapping (img1), mapping an image (img2) in
> a newly created pool (pool2) fails:
>
> $ ceph osd pool create pool1 8 8
> $ rbd create --size 1000 pool1/img1
> $ sudo rbd map pool1/img1
> $ ceph osd pool create pool2 8 8
> $ rbd create --size 1000 pool2/img2
> $ sudo rbd map pool2/img2
> rbd: sysfs write failed
> rbd: map failed: (2) No such file or directory
>
> This is because client instances are shared by default and we don't
> request an osdmap update when bumping a ref on an existing client. The
> fix is to use the mon_get_version request to see if the osdmap we have
> is the latest, and block until the requested update is received if it's
> not.
This is slightly more heavyweight than the userspace client's approach.
There, we only check for a newer osdmap if we find that the pool doesn't
exist. That shouldn't be too difficult to mirror here... probably just
expose a wait_for_latest_map() function, and call that + retry from the
rbd map code?
sage
>
> Fixes: http://tracker.ceph.com/issues/8184
>
> Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
> ---
> drivers/block/rbd.c | 27 +++++++++++++++++++++++----
> 1 file changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 552a2edcaa74..a3734726eef9 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -723,15 +723,34 @@ static int parse_rbd_opts_token(char *c, void *private)
> static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
> {
> struct rbd_client *rbdc;
> + u64 newest_epoch;
>
> mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
> rbdc = rbd_client_find(ceph_opts);
> - if (rbdc) /* using an existing client */
> - ceph_destroy_options(ceph_opts);
> - else
> + if (!rbdc) {
> rbdc = rbd_client_create(ceph_opts);
> - mutex_unlock(&client_mutex);
> + mutex_unlock(&client_mutex);
> + return rbdc;
> + }
> +
> + /*
> + * Using an existing client, make sure we've got the latest
> + * osdmap. Ignore the errors though, as failing to get it
> + * doesn't necessarily prevent from working.
> + */
> + if (ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
> + &newest_epoch) < 0)
> + goto out;
> +
> + if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
> + ceph_monc_request_next_osdmap(&rbdc->client->monc);
> + (void) ceph_monc_wait_osdmap(&rbdc->client->monc, newest_epoch,
> + rbdc->client->options->mount_timeout * HZ);
> + }
>
> +out:
> + mutex_unlock(&client_mutex);
> + ceph_destroy_options(ceph_opts);
> return rbdc;
> }
>
> --
> 1.7.10.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map'
2014-05-07 16:03 ` [PATCH 3/3] " Sage Weil
@ 2014-05-07 16:04 ` Sage Weil
0 siblings, 0 replies; 7+ messages in thread
From: Sage Weil @ 2014-05-07 16:04 UTC (permalink / raw)
To: Ilya Dryomov; +Cc: ceph-devel
On Wed, 7 May 2014, Sage Weil wrote:
> On Thu, 24 Apr 2014, Ilya Dryomov wrote:
> > Given an existing idle mapping (img1), mapping an image (img2) in
> > a newly created pool (pool2) fails:
> >
> > $ ceph osd pool create pool1 8 8
> > $ rbd create --size 1000 pool1/img1
> > $ sudo rbd map pool1/img1
> > $ ceph osd pool create pool2 8 8
> > $ rbd create --size 1000 pool2/img2
> > $ sudo rbd map pool2/img2
> > rbd: sysfs write failed
> > rbd: map failed: (2) No such file or directory
> >
> > This is because client instances are shared by default and we don't
> > request an osdmap update when bumping a ref on an existing client. The
> > fix is to use the mon_get_version request to see if the osdmap we have
> > is the latest, and block until the requested update is received if it's
> > not.
>
> This is slightly more heavyweight than the userspace client's approach.
> There, we only check for a newer osdmap if we find that the pool doesn't
> exist. That shouldn't be too difficult to mirror here... probably just
> expose a wait_for_latest_map() function, and call that + retry from the
> rbd map code?
(Der, and I see now that your 2/3 patch already exposes that method. :)
sage
>
> sage
>
> >
> > Fixes: http://tracker.ceph.com/issues/8184
> >
> > Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
> > ---
> > drivers/block/rbd.c | 27 +++++++++++++++++++++++----
> > 1 file changed, 23 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> > index 552a2edcaa74..a3734726eef9 100644
> > --- a/drivers/block/rbd.c
> > +++ b/drivers/block/rbd.c
> > @@ -723,15 +723,34 @@ static int parse_rbd_opts_token(char *c, void *private)
> > static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
> > {
> > struct rbd_client *rbdc;
> > + u64 newest_epoch;
> >
> > mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
> > rbdc = rbd_client_find(ceph_opts);
> > - if (rbdc) /* using an existing client */
> > - ceph_destroy_options(ceph_opts);
> > - else
> > + if (!rbdc) {
> > rbdc = rbd_client_create(ceph_opts);
> > - mutex_unlock(&client_mutex);
> > + mutex_unlock(&client_mutex);
> > + return rbdc;
> > + }
> > +
> > + /*
> > + * Using an existing client, make sure we've got the latest
> > + * osdmap. Ignore the errors though, as failing to get it
> > + * doesn't necessarily prevent from working.
> > + */
> > + if (ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
> > + &newest_epoch) < 0)
> > + goto out;
> > +
> > + if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
> > + ceph_monc_request_next_osdmap(&rbdc->client->monc);
> > + (void) ceph_monc_wait_osdmap(&rbdc->client->monc, newest_epoch,
> > + rbdc->client->options->mount_timeout * HZ);
> > + }
> >
> > +out:
> > + mutex_unlock(&client_mutex);
> > + ceph_destroy_options(ceph_opts);
> > return rbdc;
> > }
> >
> > --
> > 1.7.10.4
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> >
> >
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2014-05-07 16:04 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-24 16:23 [PATCH 0/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
2014-04-24 16:23 ` [PATCH 1/3] libceph: mon_get_version request infrastructure Ilya Dryomov
2014-04-24 16:23 ` [PATCH 2/3] libceph: add ceph_monc_wait_osdmap() Ilya Dryomov
2014-04-24 16:23 ` [PATCH 3/3] rbd: make sure we have latest osdmap on 'rbd map' Ilya Dryomov
2014-04-30 12:28 ` [PATCH v2] " Ilya Dryomov
2014-05-07 16:03 ` [PATCH 3/3] " Sage Weil
2014-05-07 16:04 ` Sage Weil
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.