All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Elder <elder@inktank.com>
To: ceph-devel@vger.kernel.org
Subject: [PATCH 11/11] rbd: implement layered reads
Date: Thu, 11 Apr 2013 21:19:05 -0500	[thread overview]
Message-ID: <51676F19.8080408@inktank.com> (raw)
In-Reply-To: <51676E0F.2010504@inktank.com>

Implement layered read requests for format 2 rbd images.

If an rbd image is a clone of a snapshot, the snapshot will be the
clone's "parent" image.  When an object read request on a clone
comes back with ENOENT it indicates that the clone is not yet
populated with that portion of the image's data, and the parent
image should be consulted to satisfy the read.

When this occurs, a new image request is created, directed to the
parent image.  The offset and length of the image are the same as
the image-relative offset and length of the object request that
produced ENOENT.  Data from the parent image therefore satisfies the
object read request for the original image request.

While this code works, it will not be active until we enable the
layering feature (by adding RBD_FEATURE_LAYERING to the value of
RBD_FEATURES_SUPPORTED).

Signed-off-by: Alex Elder <elder@inktank.com>
---
 drivers/block/rbd.c |   97
++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 85 insertions(+), 12 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5c129c5..13a381b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -398,6 +398,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char
*fmt, ...)
 #  define rbd_assert(expr)	((void) 0)
 #endif /* !RBD_DEBUG */

+static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
+
 static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver);
 static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);

@@ -1336,9 +1338,15 @@ static void rbd_osd_trivial_callback(struct
rbd_obj_request *obj_request)

 static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
 {
-	dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
-		obj_request->result, obj_request->xferred, obj_request->length);
-	if (obj_request->img_request)
+	struct rbd_img_request *img_request = obj_request->img_request;
+	bool layered = img_request && img_request_layered_test(img_request);
+
+	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+		obj_request, img_request, obj_request->result,
+		obj_request->xferred, obj_request->length);
+	if (layered && obj_request->result == -ENOENT)
+		rbd_img_parent_read(obj_request);
+	else if (img_request)
 		rbd_img_obj_request_read_callback(obj_request);
 	else
 		obj_request_done_set(obj_request);
@@ -1349,9 +1357,8 @@ static void rbd_osd_write_callback(struct
rbd_obj_request *obj_request)
 	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
 		obj_request->result, obj_request->length);
 	/*
-	 * There is no such thing as a successful short write.
-	 * Our xferred value is the number of bytes transferred
-	 * back.  Set it to our originally-requested length.
+	 * There is no such thing as a successful short write.  Set
+	 * it to our originally-requested length.
 	 */
 	obj_request->xferred = obj_request->length;
 	obj_request_done_set(obj_request);
@@ -1391,7 +1398,7 @@ static void rbd_osd_req_callback(struct
ceph_osd_request *osd_req,
 	 * passed to blk_end_request(), which takes an unsigned int.
 	 */
 	obj_request->xferred = osd_req->r_reply_op_len[0];
-	rbd_assert(obj_request->xferred < (u64) UINT_MAX);
+	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
 	opcode = osd_req->r_ops[0].op;
 	switch (opcode) {
 	case CEPH_OSD_OP_READ:
@@ -1607,7 +1614,6 @@ static struct rbd_img_request *rbd_img_request_create(
 	INIT_LIST_HEAD(&img_request->obj_requests);
 	kref_init(&img_request->kref);

-	(void) img_request_layered_test(img_request);	/* Avoid a warning */
 	rbd_img_request_get(img_request);	/* Avoid a warning */
 	rbd_img_request_put(img_request);	/* TEMPORARY */

@@ -1635,6 +1641,9 @@ static void rbd_img_request_destroy(struct kref *kref)
 	if (img_request_write_test(img_request))
 		ceph_put_snap_context(img_request->snapc);

+	if (img_request_child_test(img_request))
+		rbd_obj_request_put(img_request->obj_request);
+
 	kfree(img_request);
 }

@@ -1643,13 +1652,11 @@ static bool rbd_img_obj_end_request(struct
rbd_obj_request *obj_request)
 	struct rbd_img_request *img_request;
 	unsigned int xferred;
 	int result;
+	bool more;

 	rbd_assert(obj_request_img_data_test(obj_request));
 	img_request = obj_request->img_request;

-	rbd_assert(!img_request_child_test(img_request));
-	rbd_assert(img_request->rq != NULL);
-
 	rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
 	xferred = (unsigned int)obj_request->xferred;
 	result = obj_request->result;
@@ -1666,7 +1673,15 @@ static bool rbd_img_obj_end_request(struct
rbd_obj_request *obj_request)
 			img_request->result = result;
 	}

-	return blk_end_request(img_request->rq, result, xferred);
+	if (img_request_child_test(img_request)) {
+		rbd_assert(img_request->obj_request != NULL);
+		more = obj_request->which < img_request->obj_request_count - 1;
+	} else {
+		rbd_assert(img_request->rq != NULL);
+		more = blk_end_request(img_request->rq, result, xferred);
+	}
+
+	return more;
 }

 static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
@@ -1811,6 +1826,64 @@ static int rbd_img_request_submit(struct
rbd_img_request *img_request)
 	return 0;
 }

+static void rbd_img_parent_read_callback(struct rbd_img_request
*img_request)
+{
+	struct rbd_obj_request *obj_request;
+
+	rbd_assert(img_request_child_test(img_request));
+
+	obj_request = img_request->obj_request;
+	rbd_assert(obj_request != NULL);
+	obj_request->result = img_request->result;
+	obj_request->xferred = img_request->xferred;
+
+	rbd_img_obj_request_read_callback(obj_request);
+	rbd_obj_request_complete(obj_request);
+}
+
+static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
+{
+	struct rbd_device *rbd_dev;
+	struct rbd_img_request *img_request;
+	int result;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	rbd_assert(obj_request->img_request != NULL);
+	rbd_assert(obj_request->result == (s32) -ENOENT);
+	rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
+
+	rbd_dev = obj_request->img_request->rbd_dev;
+	rbd_assert(rbd_dev->parent != NULL);
+	/* rbd_read_finish(obj_request, obj_request->length); */
+	img_request = rbd_img_request_create(rbd_dev->parent,
+						obj_request->img_offset,
+						obj_request->length,
+						false, true);
+	result = -ENOMEM;
+	if (!img_request)
+		goto out_err;
+
+	rbd_obj_request_get(obj_request);
+	img_request->obj_request = obj_request;
+
+	result = rbd_img_request_fill_bio(img_request, obj_request->bio_list);
+	if (result)
+		goto out_err;
+
+	img_request->callback = rbd_img_parent_read_callback;
+	result = rbd_img_request_submit(img_request);
+	if (result)
+		goto out_err;
+
+	return;
+out_err:
+	if (img_request)
+		rbd_img_request_put(img_request);
+	obj_request->result = result;
+	obj_request->xferred = 0;
+	obj_request_done_set(obj_request);
+}
+
 static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
 				   u64 ver, u64 notify_id)
 {
-- 
1.7.9.5


  parent reply	other threads:[~2013-04-12  2:19 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-12  2:14 [PATCH 00/11] rbd: layered read functionality Alex Elder
2013-04-12  2:17 ` [PATCH 01/11] rbd: record overall image request result Alex Elder
2013-04-12  2:17 ` [PATCH 02/11] rbd: record aggregate image transfer count Alex Elder
2013-04-12  2:17 ` [PATCH 03/11] rbd: record image-relative offset in object requests Alex Elder
2013-04-12  2:17 ` [PATCH 04/11] rbd: define image request flags Alex Elder
2013-04-12  2:17 ` [PATCH 05/11] rbd: define image request originator flag Alex Elder
2013-04-12  2:18 ` [PATCH 06/11] rbd: define image request layered flag Alex Elder
2013-04-12  2:18 ` [PATCH 07/11] rbd: encapsulate image object end request handling Alex Elder
2013-04-12  2:18 ` [PATCH 08/11] rbd: define an rbd object request flags field Alex Elder
2013-04-12  2:18 ` [PATCH 09/11] rbd: add an object request flag for image data objects Alex Elder
2013-04-12  2:18 ` [PATCH 10/11] rbd: probe the parent of an image if present Alex Elder
2013-04-12  2:19 ` Alex Elder [this message]
2013-04-14 17:22 ` [PATCH 00/11] rbd: layered read functionality Josh Durgin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51676F19.8080408@inktank.com \
    --to=elder@inktank.com \
    --cc=ceph-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.