* [PATCH 1/6] rbd: return errors for mapped but deleted snapshot
2012-07-19 12:30 [PATCH 0/6] rbd: old patches from Josh Alex Elder
@ 2012-07-19 12:34 ` Alex Elder
2012-07-19 12:34 ` [PATCH 2/6] rbd: only reset capacity when pointing to head Alex Elder
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Alex Elder @ 2012-07-19 12:34 UTC (permalink / raw)
To: ceph-devel
When a snapshot is deleted, the OSD will return ENOENT when reading
from it. This is normally interpreted as a hole by rbd, which will
return zeroes. To minimize the time in which this can happen, stop
requests early when we are notified that our snapshot no longer
exists.
[elder@inktank.com: updated __rbd_init_snaps_header() logic]
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 32 ++++++++++++++++++++++++++++++--
1 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b124442..730d0ce 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -172,9 +172,13 @@ struct rbd_device {
/* protects updating the header */
struct rw_semaphore header_rwsem;
+ /* name of the snapshot this device reads from */
char *snap_name;
+ /* id of the snapshot this device reads from */
u64 snap_id; /* current snapshot id */
- int read_only;
+ /* whether the snap_id this device reads from still exists */
+ bool snap_exists;
+ int read_only;
struct list_head node;
@@ -597,6 +601,7 @@ static int rbd_header_set_snap(struct rbd_device
*rbd_dev, u64 *size)
else
snapc->seq = 0;
rbd_dev->snap_id = CEPH_NOSNAP;
+ rbd_dev->snap_exists = false;
rbd_dev->read_only = 0;
if (size)
*size = header->image_size;
@@ -606,6 +611,7 @@ static int rbd_header_set_snap(struct rbd_device
*rbd_dev, u64 *size)
if (ret < 0)
goto done;
rbd_dev->snap_id = snapc->seq;
+ rbd_dev->snap_exists = true;
rbd_dev->read_only = 1;
}
@@ -1468,6 +1474,21 @@ static void rbd_rq_fn(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
+ if (rbd_dev->snap_id != CEPH_NOSNAP) {
+ bool snap_exists;
+
+ down_read(&rbd_dev->header_rwsem);
+ snap_exists = rbd_dev->snap_exists;
+ up_read(&rbd_dev->header_rwsem);
+
+ if (!snap_exists) {
+ dout("request for non-existent snapshot");
+ spin_lock_irq(q->queue_lock);
+ __blk_end_request_all(rq, -ENXIO);
+ continue;
+ }
+ }
+
dout("%s 0x%x bytes at 0x%llx\n",
do_write ? "write" : "read",
size, blk_rq_pos(rq) * SECTOR_SIZE);
@@ -2088,7 +2109,14 @@ static int __rbd_init_snaps_header(struct
rbd_device *rbd_dev)
cur_id = rbd_dev->header.snapc->snaps[i - 1];
if (!i || old_snap->id < cur_id) {
- /* old_snap->id was skipped, thus was removed */
+ /*
+ * old_snap->id was skipped, thus was
+ * removed. If this rbd_dev is mapped to
+ * the removed snapshot, record that it no
+ * longer exists, to prevent further I/O.
+ */
+ if (rbd_dev->snap_id == old_snap->id)
+ rbd_dev->snap_exists = false;
__rbd_remove_snap_dev(rbd_dev, old_snap);
continue;
}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/6] rbd: only reset capacity when pointing to head
2012-07-19 12:30 [PATCH 0/6] rbd: old patches from Josh Alex Elder
2012-07-19 12:34 ` [PATCH 1/6] rbd: return errors for mapped but deleted snapshot Alex Elder
@ 2012-07-19 12:34 ` Alex Elder
2012-07-19 12:34 ` [PATCH 3/6] rbd: expose the correct size of the device in sysfs Alex Elder
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Alex Elder @ 2012-07-19 12:34 UTC (permalink / raw)
To: ceph-devel
Snapshots cannot be resized, and the new capacity of head should not
be reflected by the snapshot.
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 7 ++++++-
1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 730d0ce..f171ceb 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1725,7 +1725,12 @@ static int __rbd_refresh_header(struct rbd_device
*rbd_dev)
return ret;
/* resized? */
- set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
+ if (rbd_dev->snap_id == CEPH_NOSNAP) {
+ sector_t size = (sector_t) h.image_size / SECTOR_SIZE;
+
+ dout("setting size to %llu sectors", (unsigned long long) size);
+ set_capacity(rbd_dev->disk, size);
+ }
down_write(&rbd_dev->header_rwsem);
--
1.7.5.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/6] rbd: expose the correct size of the device in sysfs
2012-07-19 12:30 [PATCH 0/6] rbd: old patches from Josh Alex Elder
2012-07-19 12:34 ` [PATCH 1/6] rbd: return errors for mapped but deleted snapshot Alex Elder
2012-07-19 12:34 ` [PATCH 2/6] rbd: only reset capacity when pointing to head Alex Elder
@ 2012-07-19 12:34 ` Alex Elder
2012-07-19 12:34 ` [PATCH 4/6] rbd: set image size when header is updated Alex Elder
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Alex Elder @ 2012-07-19 12:34 UTC (permalink / raw)
To: ceph-devel
If an image was mapped to a snapshot, the size of the head version
would be shown. Protect capacity with header_rwsem, since it may
change.
Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 11 ++++++++---
1 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index f171ceb..9c3a1db 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1724,6 +1724,8 @@ static int __rbd_refresh_header(struct rbd_device
*rbd_dev)
if (ret < 0)
return ret;
+ down_write(&rbd_dev->header_rwsem);
+
/* resized? */
if (rbd_dev->snap_id == CEPH_NOSNAP) {
sector_t size = (sector_t) h.image_size / SECTOR_SIZE;
@@ -1732,8 +1734,6 @@ static int __rbd_refresh_header(struct rbd_device
*rbd_dev)
set_capacity(rbd_dev->disk, size);
}
- down_write(&rbd_dev->header_rwsem);
-
snap_seq = rbd_dev->header.snapc->seq;
if (rbd_dev->header.total_snaps &&
rbd_dev->header.snapc->snaps[0] == snap_seq)
@@ -1853,8 +1853,13 @@ static ssize_t rbd_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ sector_t size;
+
+ down_read(&rbd_dev->header_rwsem);
+ size = get_capacity(rbd_dev->disk);
+ up_read(&rbd_dev->header_rwsem);
- return sprintf(buf, "%llu\n", (unsigned long
long)rbd_dev->header.image_size);
+ return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
}
static ssize_t rbd_major_show(struct device *dev,
--
1.7.5.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/6] rbd: set image size when header is updated
2012-07-19 12:30 [PATCH 0/6] rbd: old patches from Josh Alex Elder
` (2 preceding siblings ...)
2012-07-19 12:34 ` [PATCH 3/6] rbd: expose the correct size of the device in sysfs Alex Elder
@ 2012-07-19 12:34 ` Alex Elder
2012-07-19 12:35 ` [PATCH 5/6] rbd: use reference counting for the snap context Alex Elder
2012-07-19 12:35 ` [PATCH 6/6] rbd: send header version when notifying Alex Elder
5 siblings, 0 replies; 7+ messages in thread
From: Alex Elder @ 2012-07-19 12:34 UTC (permalink / raw)
To: ceph-devel
The image may have been resized.
Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 9c3a1db..a6bbda2 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1746,6 +1746,7 @@ static int __rbd_refresh_header(struct rbd_device
*rbd_dev)
kfree(rbd_dev->header.snap_names);
kfree(rbd_dev->header.snapc);
+ rbd_dev->header.image_size = h.image_size;
rbd_dev->header.total_snaps = h.total_snaps;
rbd_dev->header.snapc = h.snapc;
rbd_dev->header.snap_names = h.snap_names;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/6] rbd: use reference counting for the snap context
2012-07-19 12:30 [PATCH 0/6] rbd: old patches from Josh Alex Elder
` (3 preceding siblings ...)
2012-07-19 12:34 ` [PATCH 4/6] rbd: set image size when header is updated Alex Elder
@ 2012-07-19 12:35 ` Alex Elder
2012-07-19 12:35 ` [PATCH 6/6] rbd: send header version when notifying Alex Elder
5 siblings, 0 replies; 7+ messages in thread
From: Alex Elder @ 2012-07-19 12:35 UTC (permalink / raw)
To: ceph-devel
This prevents a race between requests with a given snap context and
header updates that free it. The osd client was already expecting the
snap context to be reference counted, since it get()s it in
ceph_osdc_build_request and put()s it when the request completes.
Also remove the second down_read()/up_read() on header_rwsem in
rbd_do_request, which wasn't actually preventing this race or
protecting any other data.
Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 36 ++++++++++++++++++------------------
1 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index a6bbda2..988f944 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -626,7 +626,7 @@ static void rbd_header_free(struct rbd_image_header
*header)
kfree(header->object_prefix);
kfree(header->snap_sizes);
kfree(header->snap_names);
- kfree(header->snapc);
+ ceph_put_snap_context(header->snapc);
}
/*
@@ -902,13 +902,10 @@ static int rbd_do_request(struct request *rq,
dout("rbd_do_request object_name=%s ofs=%lld len=%lld\n",
object_name, len, ofs);
- down_read(&rbd_dev->header_rwsem);
-
osdc = &rbd_dev->rbd_client->client->osdc;
req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
false, GFP_NOIO, pages, bio);
if (!req) {
- up_read(&rbd_dev->header_rwsem);
ret = -ENOMEM;
goto done_pages;
}
@@ -942,7 +939,6 @@ static int rbd_do_request(struct request *rq,
snapc,
&mtime,
req->r_oid, req->r_oid_len);
- up_read(&rbd_dev->header_rwsem);
if (linger_req) {
ceph_osdc_set_request_linger(osdc, req);
@@ -1448,6 +1444,7 @@ static void rbd_rq_fn(struct request_queue *q)
u64 ofs;
int num_segs, cur_seg = 0;
struct rbd_req_coll *coll;
+ struct ceph_snap_context *snapc;
/* peek at request from block layer */
if (!rq)
@@ -1474,21 +1471,20 @@ static void rbd_rq_fn(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
- if (rbd_dev->snap_id != CEPH_NOSNAP) {
- bool snap_exists;
+ down_read(&rbd_dev->header_rwsem);
- down_read(&rbd_dev->header_rwsem);
- snap_exists = rbd_dev->snap_exists;
+ if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) {
up_read(&rbd_dev->header_rwsem);
-
- if (!snap_exists) {
- dout("request for non-existent snapshot");
- spin_lock_irq(q->queue_lock);
- __blk_end_request_all(rq, -ENXIO);
- continue;
- }
+ dout("request for non-existent snapshot");
+ spin_lock_irq(q->queue_lock);
+ __blk_end_request_all(rq, -ENXIO);
+ continue;
}
+ snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+
+ up_read(&rbd_dev->header_rwsem);
+
dout("%s 0x%x bytes at 0x%llx\n",
do_write ? "write" : "read",
size, blk_rq_pos(rq) * SECTOR_SIZE);
@@ -1498,6 +1494,7 @@ static void rbd_rq_fn(struct request_queue *q)
if (!coll) {
spin_lock_irq(q->queue_lock);
__blk_end_request_all(rq, -ENOMEM);
+ ceph_put_snap_context(snapc);
continue;
}
@@ -1521,7 +1518,7 @@ static void rbd_rq_fn(struct request_queue *q)
/* init OSD command: write or read */
if (do_write)
rbd_req_write(rq, rbd_dev,
- rbd_dev->header.snapc,
+ snapc,
ofs,
op_size, bio,
coll, cur_seg);
@@ -1544,6 +1541,8 @@ next_seg:
if (bp)
bio_pair_release(bp);
spin_lock_irq(q->queue_lock);
+
+ ceph_put_snap_context(snapc);
}
}
@@ -1744,7 +1743,8 @@ static int __rbd_refresh_header(struct rbd_device
*rbd_dev)
/* rbd_dev->header.object_prefix shouldn't change */
kfree(rbd_dev->header.snap_sizes);
kfree(rbd_dev->header.snap_names);
- kfree(rbd_dev->header.snapc);
+ /* osd requests may still refer to snapc */
+ ceph_put_snap_context(rbd_dev->header.snapc);
rbd_dev->header.image_size = h.image_size;
rbd_dev->header.total_snaps = h.total_snaps;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/6] rbd: send header version when notifying
2012-07-19 12:30 [PATCH 0/6] rbd: old patches from Josh Alex Elder
` (4 preceding siblings ...)
2012-07-19 12:35 ` [PATCH 5/6] rbd: use reference counting for the snap context Alex Elder
@ 2012-07-19 12:35 ` Alex Elder
5 siblings, 0 replies; 7+ messages in thread
From: Alex Elder @ 2012-07-19 12:35 UTC (permalink / raw)
To: ceph-devel
Previously the original header version was sent. Now, we update it
when the header changes.
Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 988f944..4d3a1e0 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1197,7 +1197,7 @@ static int rbd_req_sync_notify_ack(struct
rbd_device *rbd_dev,
if (ret < 0)
return ret;
- ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version);
+ ops[0].watch.ver = cpu_to_le64(ver);
ops[0].watch.cookie = notify_id;
ops[0].watch.flag = 0;
@@ -1216,6 +1216,7 @@ static int rbd_req_sync_notify_ack(struct
rbd_device *rbd_dev,
static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{
struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ u64 hver;
int rc;
if (!rbd_dev)
@@ -1225,12 +1226,13 @@ static void rbd_watch_cb(u64 ver, u64 notify_id,
u8 opcode, void *data)
rbd_dev->header_name, notify_id, (int) opcode);
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
rc = __rbd_refresh_header(rbd_dev);
+ hver = rbd_dev->header.obj_version;
mutex_unlock(&ctl_mutex);
if (rc)
pr_warning(RBD_DRV_NAME "%d got notification but failed to "
" update snaps: %d\n", rbd_dev->major, rc);
- rbd_req_sync_notify_ack(rbd_dev, ver, notify_id, rbd_dev->header_name);
+ rbd_req_sync_notify_ack(rbd_dev, hver, notify_id, rbd_dev->header_name);
}
/*
@@ -1746,6 +1748,7 @@ static int __rbd_refresh_header(struct rbd_device
*rbd_dev)
/* osd requests may still refer to snapc */
ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.obj_version = h.obj_version;
rbd_dev->header.image_size = h.image_size;
rbd_dev->header.total_snaps = h.total_snaps;
rbd_dev->header.snapc = h.snapc;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 7+ messages in thread