All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] rbd: layering (mostly) error path fixes
@ 2016-09-19 17:03 Ilya Dryomov
  2016-09-19 17:03 ` [PATCH 1/8] rbd: change rbd_obj_request_submit() signature Ilya Dryomov
                   ` (8 more replies)
  0 siblings, 9 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

Hello,

This is what came out of looking at David's rbd_img_obj_exists_submit()
patches.  Alex, I'd appreciate it if you could take look, at patch 4 in
particular.

Thanks,

                Ilya


Ilya Dryomov (8):
  rbd: change rbd_obj_request_submit() signature
  rbd: clean up asserts in rbd_img_obj_request_submit() helpers
  rbd: mark the original request as done if stat request fails
  rbd: move bumping img_request refcount into rbd_obj_request_submit()
  rbd: don't crash or leak on errors in
    rbd_img_obj_parent_read_full_callback()
  rbd: rework rbd_img_obj_exists_submit() error paths
  rbd: don't call rbd_osd_req_format_read() for !img_data requests
  rbd: img_data requests don't own their page array

 drivers/block/rbd.c | 199 ++++++++++++++++++++++------------------------------
 1 file changed, 82 insertions(+), 117 deletions(-)

-- 
2.4.3


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/8] rbd: change rbd_obj_request_submit() signature
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-23 15:38   ` Alex Elder
  2016-09-26 12:38   ` David Disseldorp
  2016-09-19 17:03 ` [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers Ilya Dryomov
                   ` (7 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

- osdc parameter is useless
- starting with commit 5aea3dcd5021 ("libceph: a major OSD client
  update"), ceph_osdc_start_request() always returns success

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 70 ++++++++++++++++++-----------------------------------
 1 file changed, 23 insertions(+), 47 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 82569e65d61b..d8b702e3c4d9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1610,11 +1610,12 @@ static bool obj_request_type_valid(enum obj_request_type type)
 	}
 }
 
-static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
-				struct rbd_obj_request *obj_request)
+static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
-	dout("%s %p\n", __func__, obj_request);
-	return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
+	struct ceph_osd_request *osd_req = obj_request->osd_req;
+
+	dout("%s %p osd_req %p\n", __func__, obj_request, osd_req);
+	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
 }
 
 static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
@@ -2638,7 +2639,6 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
 {
 	struct rbd_obj_request *orig_request;
 	struct ceph_osd_request *osd_req;
-	struct ceph_osd_client *osdc;
 	struct rbd_device *rbd_dev;
 	struct page **pages;
 	enum obj_operation_type op_type;
@@ -2675,13 +2675,9 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
 	 * and re-submit the original write request.
 	 */
 	if (!rbd_dev->parent_overlap) {
-		struct ceph_osd_client *osdc;
-
 		ceph_release_page_vector(pages, page_count);
-		osdc = &rbd_dev->rbd_client->client->osdc;
-		img_result = rbd_obj_request_submit(osdc, orig_request);
-		if (!img_result)
-			return;
+		rbd_obj_request_submit(orig_request);
+		return;
 	}
 
 	if (img_result)
@@ -2715,10 +2711,9 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
 
 	/* All set, send it off. */
 
-	osdc = &rbd_dev->rbd_client->client->osdc;
-	img_result = rbd_obj_request_submit(osdc, orig_request);
-	if (!img_result)
-		return;
+	rbd_obj_request_submit(orig_request);
+	return;
+
 out_err:
 	/* Record the error code and complete the request */
 
@@ -2852,17 +2847,13 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
 
 	/*
 	 * If the overlap has become 0 (most likely because the
-	 * image has been flattened) we need to free the pages
-	 * and re-submit the original write request.
+	 * image has been flattened) we need to re-submit the
+	 * original request.
 	 */
 	rbd_dev = orig_request->img_request->rbd_dev;
 	if (!rbd_dev->parent_overlap) {
-		struct ceph_osd_client *osdc;
-
-		osdc = &rbd_dev->rbd_client->client->osdc;
-		result = rbd_obj_request_submit(osdc, orig_request);
-		if (!result)
-			return;
+		rbd_obj_request_submit(orig_request);
+		return;
 	}
 
 	/*
@@ -2894,7 +2885,6 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 {
 	struct rbd_obj_request *stat_request;
 	struct rbd_device *rbd_dev;
-	struct ceph_osd_client *osdc;
 	struct page **pages = NULL;
 	u32 page_count;
 	size_t size;
@@ -2938,8 +2928,9 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 					false, false);
 	rbd_osd_req_format_read(stat_request);
 
-	osdc = &rbd_dev->rbd_client->client->osdc;
-	ret = rbd_obj_request_submit(osdc, stat_request);
+	rbd_obj_request_submit(stat_request);
+	return 0;
+
 out:
 	if (ret)
 		rbd_obj_request_put(obj_request);
@@ -2996,13 +2987,8 @@ static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
 static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
 {
 	if (img_obj_request_simple(obj_request)) {
-		struct rbd_device *rbd_dev;
-		struct ceph_osd_client *osdc;
-
-		rbd_dev = obj_request->img_request->rbd_dev;
-		osdc = &rbd_dev->rbd_client->client->osdc;
-
-		return rbd_obj_request_submit(osdc, obj_request);
+		rbd_obj_request_submit(obj_request);
+		return 0;
 	}
 
 	/*
@@ -3065,12 +3051,8 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
 	rbd_assert(obj_request->img_request);
 	rbd_dev = obj_request->img_request->rbd_dev;
 	if (!rbd_dev->parent_overlap) {
-		struct ceph_osd_client *osdc;
-
-		osdc = &rbd_dev->rbd_client->client->osdc;
-		img_result = rbd_obj_request_submit(osdc, obj_request);
-		if (!img_result)
-			return;
+		rbd_obj_request_submit(obj_request);
+		return;
 	}
 
 	obj_request->result = img_result;
@@ -3997,7 +3979,6 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
 			     void *inbound,
 			     size_t inbound_size)
 {
-	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
 	struct page **pages;
 	u32 page_count;
@@ -4048,9 +4029,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
 					0, false, false);
 	rbd_osd_req_format_read(obj_request);
 
-	ret = rbd_obj_request_submit(osdc, obj_request);
-	if (ret)
-		goto out;
+	rbd_obj_request_submit(obj_request);
 	ret = rbd_obj_request_wait(obj_request);
 	if (ret)
 		goto out;
@@ -4255,7 +4234,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
 				u64 offset, u64 length, void *buf)
 
 {
-	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
 	struct page **pages = NULL;
 	u32 page_count;
@@ -4290,9 +4268,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
 					false, false);
 	rbd_osd_req_format_read(obj_request);
 
-	ret = rbd_obj_request_submit(osdc, obj_request);
-	if (ret)
-		goto out;
+	rbd_obj_request_submit(obj_request);
 	ret = rbd_obj_request_wait(obj_request);
 	if (ret)
 		goto out;
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
  2016-09-19 17:03 ` [PATCH 1/8] rbd: change rbd_obj_request_submit() signature Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-23 15:57   ` Alex Elder
  2016-09-26 12:43   ` David Disseldorp
  2016-09-19 17:03 ` [PATCH 3/8] rbd: mark the original request as done if stat request fails Ilya Dryomov
                   ` (6 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

Assert once in rbd_img_obj_request_submit().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index d8b702e3c4d9..027e0817a118 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2739,21 +2739,14 @@ out_err:
  */
 static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
 {
-	struct rbd_img_request *img_request = NULL;
+	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
 	struct rbd_img_request *parent_request = NULL;
-	struct rbd_device *rbd_dev;
 	u64 img_offset;
 	u64 length;
 	struct page **pages = NULL;
 	u32 page_count;
 	int result;
 
-	rbd_assert(obj_request_img_data_test(obj_request));
-	rbd_assert(obj_request_type_valid(obj_request->type));
-
-	img_request = obj_request->img_request;
-	rbd_assert(img_request != NULL);
-	rbd_dev = img_request->rbd_dev;
 	rbd_assert(rbd_dev->parent != NULL);
 
 	/*
@@ -2794,10 +2787,11 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
 	result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
 	if (result)
 		goto out_err;
+
 	parent_request->copyup_pages = pages;
 	parent_request->copyup_page_count = page_count;
-
 	parent_request->callback = rbd_img_obj_parent_read_full_callback;
+
 	result = rbd_img_request_submit(parent_request);
 	if (!result)
 		return 0;
@@ -2883,8 +2877,8 @@ out:
 
 static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 {
+	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
 	struct rbd_obj_request *stat_request;
-	struct rbd_device *rbd_dev;
 	struct page **pages = NULL;
 	u32 page_count;
 	size_t size;
@@ -2915,8 +2909,6 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 	stat_request->pages = pages;
 	stat_request->page_count = page_count;
 
-	rbd_assert(obj_request->img_request);
-	rbd_dev = obj_request->img_request->rbd_dev;
 	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
 						   stat_request);
 	if (!stat_request->osd_req)
@@ -2940,14 +2932,8 @@ out:
 
 static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
 {
-	struct rbd_img_request *img_request;
-	struct rbd_device *rbd_dev;
-
-	rbd_assert(obj_request_img_data_test(obj_request));
-
-	img_request = obj_request->img_request;
-	rbd_assert(img_request);
-	rbd_dev = img_request->rbd_dev;
+	struct rbd_img_request *img_request = obj_request->img_request;
+	struct rbd_device *rbd_dev = img_request->rbd_dev;
 
 	/* Reads */
 	if (!img_request_write_test(img_request) &&
@@ -2986,6 +2972,10 @@ static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
 
 static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
 {
+	rbd_assert(obj_request_img_data_test(obj_request));
+	rbd_assert(obj_request_type_valid(obj_request->type));
+	rbd_assert(obj_request->img_request);
+
 	if (img_obj_request_simple(obj_request)) {
 		rbd_obj_request_submit(obj_request);
 		return 0;
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 3/8] rbd: mark the original request as done if stat request fails
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
  2016-09-19 17:03 ` [PATCH 1/8] rbd: change rbd_obj_request_submit() signature Ilya Dryomov
  2016-09-19 17:03 ` [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-23 21:39   ` Alex Elder
  2016-09-19 17:03 ` [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit() Ilya Dryomov
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

If stat request fails with something other than -ENOENT (which just
means that we need to copyup), the original object request is never
marked as done and therefore never completed.  Fix this by moving the
mark done + complete snippet from rbd_img_obj_parent_read_full() into
rbd_img_obj_exists_callback().  The former remains covered, as the
latter is its only caller (through rbd_img_obj_request_submit()).

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 027e0817a118..b247200a0f28 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2805,10 +2805,6 @@ out_err:
 		ceph_release_page_vector(pages, page_count);
 	if (parent_request)
 		rbd_img_request_put(parent_request);
-	obj_request->result = result;
-	obj_request->xferred = 0;
-	obj_request_done_set(obj_request);
-
 	return result;
 }
 
@@ -2860,19 +2856,25 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
 		obj_request_existence_set(orig_request, true);
 	} else if (result == -ENOENT) {
 		obj_request_existence_set(orig_request, false);
-	} else if (result) {
-		orig_request->result = result;
-		goto out;
+	} else {
+		goto fail_orig_request;
 	}
 
 	/*
 	 * Resubmit the original request now that we have recorded
 	 * whether the target object exists.
 	 */
-	orig_request->result = rbd_img_obj_request_submit(orig_request);
-out:
-	if (orig_request->result)
-		rbd_obj_request_complete(orig_request);
+	result = rbd_img_obj_request_submit(orig_request);
+	if (result)
+		goto fail_orig_request;
+
+	return;
+
+fail_orig_request:
+	orig_request->result = result;
+	orig_request->xferred = 0;
+	obj_request_done_set(orig_request);
+	rbd_obj_request_complete(orig_request);
 }
 
 static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit()
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
                   ` (2 preceding siblings ...)
  2016-09-19 17:03 ` [PATCH 3/8] rbd: mark the original request as done if stat request fails Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-25 15:56   ` Alex Elder
  2016-09-19 17:03 ` [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback() Ilya Dryomov
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

Commit 0f2d5be792b0 ("rbd: use reference counts for image requests")
added rbd_img_request_get(), which rbd_img_request_fill() calls for
each obj_request added to img_request.  It was an urgent band-aid for
the uglyness that is rbd_img_obj_callback() and none of the error paths
were updated.

Given that this img_request reference is meant to represent an
obj_request that hasn't passed through rbd_img_obj_callback() yet,
proper cleanup in appropriate destructors is a challenge.  However,
noting that if we don't get a chance to call rbd_obj_request_complete(),
there is not going to be a call to rbd_img_obj_callback(), we can move
rbd_img_request_get() into rbd_obj_request_submit() and fixup the two
places that call rbd_obj_request_complete() directly and not through
rbd_obj_request_submit() to temporarily bump img_request, so that
rbd_img_obj_callback() can put as usual.

This takes care of img_request leaks on errors on the submit side.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b247200a0f28..d8070bd29fd1 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1610,11 +1610,17 @@ static bool obj_request_type_valid(enum obj_request_type type)
 	}
 }
 
+static void rbd_img_obj_callback(struct rbd_obj_request *obj_request);
+
 static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
 	dout("%s %p osd_req %p\n", __func__, obj_request, osd_req);
+	if (obj_request_img_data_test(obj_request)) {
+		WARN_ON(obj_request->callback != rbd_img_obj_callback);
+		rbd_img_request_get(obj_request->img_request);
+	}
 	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
 }
 
@@ -2580,8 +2586,6 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 
 		rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0);
 
-		rbd_img_request_get(img_request);
-
 		img_offset += length;
 		resid -= length;
 	}
@@ -2715,10 +2719,9 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
 	return;
 
 out_err:
-	/* Record the error code and complete the request */
-
 	orig_request->result = img_result;
 	orig_request->xferred = 0;
+	rbd_img_request_get(orig_request->img_request);
 	obj_request_done_set(orig_request);
 	rbd_obj_request_complete(orig_request);
 }
@@ -2873,6 +2876,7 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
 fail_orig_request:
 	orig_request->result = result;
 	orig_request->xferred = 0;
+	rbd_img_request_get(orig_request->img_request);
 	obj_request_done_set(orig_request);
 	rbd_obj_request_complete(orig_request);
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback()
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
                   ` (3 preceding siblings ...)
  2016-09-19 17:03 ` [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit() Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-25 16:02   ` Alex Elder
  2016-09-26 12:58   ` David Disseldorp
  2016-09-19 17:03 ` [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths Ilya Dryomov
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

- fix parent_length == img_request->xferred assert to not fire on
  copyup read failures
- don't leak pages if copyup read fails or we can't allocate a new osd
  request

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index d8070bd29fd1..b9a4e79a663f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2666,7 +2666,7 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
 	rbd_assert(obj_request_type_valid(orig_request->type));
 	img_result = img_request->result;
 	parent_length = img_request->length;
-	rbd_assert(parent_length == img_request->xferred);
+	rbd_assert(img_result || parent_length == img_request->xferred);
 	rbd_img_request_put(img_request);
 
 	rbd_assert(orig_request->img_request);
@@ -2719,6 +2719,7 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
 	return;
 
 out_err:
+	ceph_release_page_vector(pages, page_count);
 	orig_request->result = img_result;
 	orig_request->xferred = 0;
 	rbd_img_request_get(orig_request->img_request);
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
                   ` (4 preceding siblings ...)
  2016-09-19 17:03 ` [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback() Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-25 16:30   ` Alex Elder
  2016-09-26 12:05   ` David Disseldorp
  2016-09-19 17:03 ` [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests Ilya Dryomov
                   ` (2 subsequent siblings)
  8 siblings, 2 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

- don't put obj_request if rbd_obj_request_create() fails
- don't leak stat_request if rbd_osd_req_create() fails

Reported-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b9a4e79a663f..03f171067e08 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2886,11 +2886,23 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 {
 	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
 	struct rbd_obj_request *stat_request;
-	struct page **pages = NULL;
+	struct page **pages;
 	u32 page_count;
 	size_t size;
 	int ret;
 
+	stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
+					      OBJ_REQUEST_PAGES);
+	if (!stat_request)
+		return -ENOMEM;
+
+	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
+						   stat_request);
+	if (!stat_request->osd_req) {
+		ret = -ENOMEM;
+		goto fail_stat_request;
+	}
+
 	/*
 	 * The response data for a STAT call consists of:
 	 *     le64 length;
@@ -2902,38 +2914,28 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 	size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
 	page_count = (u32)calc_pages_for(0, size);
 	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
+	if (IS_ERR(pages)) {
+		ret = PTR_ERR(pages);
+		goto fail_stat_request;
+	}
 
-	ret = -ENOMEM;
-	stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
-							OBJ_REQUEST_PAGES);
-	if (!stat_request)
-		goto out;
+	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
+	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
+				     false, false);
 
 	rbd_obj_request_get(obj_request);
 	stat_request->obj_request = obj_request;
 	stat_request->pages = pages;
 	stat_request->page_count = page_count;
-
-	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-						   stat_request);
-	if (!stat_request->osd_req)
-		goto out;
 	stat_request->callback = rbd_img_obj_exists_callback;
 
-	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
-	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
-					false, false);
 	rbd_osd_req_format_read(stat_request);
 
 	rbd_obj_request_submit(stat_request);
 	return 0;
 
-out:
-	if (ret)
-		rbd_obj_request_put(obj_request);
-
+fail_stat_request:
+	rbd_obj_request_put(stat_request);
 	return ret;
 }
 
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
                   ` (5 preceding siblings ...)
  2016-09-19 17:03 ` [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-25 16:44   ` Alex Elder
  2016-09-26 12:05   ` David Disseldorp
  2016-09-19 17:03 ` [PATCH 8/8] rbd: img_data requests don't own their page array Ilya Dryomov
  2016-09-19 17:06 ` [PATCH 0/8] rbd: layering (mostly) error path fixes Alex Elder
  8 siblings, 2 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

Accessing obj_request->img_request union field is only valid for object
requests associated with an image (i.e. if obj_request_img_data_test()
returns true).  rbd_osd_req_format_read() used to do more, but now it
just sets osd_req->snap_id.  Standalone and stat object requests always
go to the HEAD revision and are fine with CEPH_NOSNAP set by libceph,
so get around the invalid union field use by simply not calling
rbd_osd_req_format_read() in those places.

Reported-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 03f171067e08..8907ee6342ac 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1943,11 +1943,10 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
 
 static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
 {
-	struct rbd_img_request *img_request = obj_request->img_request;
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
-	if (img_request)
-		osd_req->r_snapid = img_request->snap_id;
+	rbd_assert(obj_request_img_data_test(obj_request));
+	osd_req->r_snapid = obj_request->img_request->snap_id;
 }
 
 static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
@@ -2929,8 +2928,6 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 	stat_request->page_count = page_count;
 	stat_request->callback = rbd_img_obj_exists_callback;
 
-	rbd_osd_req_format_read(stat_request);
-
 	rbd_obj_request_submit(stat_request);
 	return 0;
 
@@ -4026,7 +4023,6 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
 	osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
 					obj_request->pages, inbound_size,
 					0, false, false);
-	rbd_osd_req_format_read(obj_request);
 
 	rbd_obj_request_submit(obj_request);
 	ret = rbd_obj_request_wait(obj_request);
@@ -4265,7 +4261,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
 					obj_request->length,
 					obj_request->offset & ~PAGE_MASK,
 					false, false);
-	rbd_osd_req_format_read(obj_request);
 
 	rbd_obj_request_submit(obj_request);
 	ret = rbd_obj_request_wait(obj_request);
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 8/8] rbd: img_data requests don't own their page array
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
                   ` (6 preceding siblings ...)
  2016-09-19 17:03 ` [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests Ilya Dryomov
@ 2016-09-19 17:03 ` Ilya Dryomov
  2016-09-25 17:06   ` Alex Elder
  2016-09-19 17:06 ` [PATCH 0/8] rbd: layering (mostly) error path fixes Alex Elder
  8 siblings, 1 reply; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-19 17:03 UTC (permalink / raw)
  To: ceph-devel; +Cc: Alex Elder

Move the check into rbd_obj_request_destroy(), to avoid use-after-free
on errors in rbd_img_request_fill().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8907ee6342ac..d305b9ebe2cf 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2139,7 +2139,9 @@ static void rbd_obj_request_destroy(struct kref *kref)
 			bio_chain_put(obj_request->bio_list);
 		break;
 	case OBJ_REQUEST_PAGES:
-		if (obj_request->pages)
+		/* img_data requests don't own their page array */
+		if (obj_request->pages &&
+		    !obj_request_img_data_test(obj_request))
 			ceph_release_page_vector(obj_request->pages,
 						obj_request->page_count);
 		break;
@@ -2360,13 +2362,6 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 		xferred = obj_request->length;
 	}
 
-	/* Image object requests don't own their page array */
-
-	if (obj_request->type == OBJ_REQUEST_PAGES) {
-		obj_request->pages = NULL;
-		obj_request->page_count = 0;
-	}
-
 	if (img_request_child_test(img_request)) {
 		rbd_assert(img_request->obj_request != NULL);
 		more = obj_request->which < img_request->obj_request_count - 1;
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 0/8] rbd: layering (mostly) error path fixes
  2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
                   ` (7 preceding siblings ...)
  2016-09-19 17:03 ` [PATCH 8/8] rbd: img_data requests don't own their page array Ilya Dryomov
@ 2016-09-19 17:06 ` Alex Elder
  8 siblings, 0 replies; 33+ messages in thread
From: Alex Elder @ 2016-09-19 17:06 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel; +Cc: Alex Elder

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> This is what came out of looking at David's rbd_img_obj_exists_submit()
> patches.  Alex, I'd appreciate it if you could take look, at patch 4 in
> particular.

I will.  You probably should have copied David on your posting...

					-Alex

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/8] rbd: change rbd_obj_request_submit() signature
  2016-09-19 17:03 ` [PATCH 1/8] rbd: change rbd_obj_request_submit() signature Ilya Dryomov
@ 2016-09-23 15:38   ` Alex Elder
  2016-09-26 12:38   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: Alex Elder @ 2016-09-23 15:38 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> - osdc parameter is useless
> - starting with commit 5aea3dcd5021 ("libceph: a major OSD client
>   update"), ceph_osdc_start_request() always returns success

Then ceph_osdc_start_request() should probably be made a
void function as well.  I know it's called all over the
place.

But this is a nice cleanup.

Reviewed-by: Alex Elder <elder@linaro.org>


> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
>  drivers/block/rbd.c | 70 ++++++++++++++++++-----------------------------------
>  1 file changed, 23 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 82569e65d61b..d8b702e3c4d9 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -1610,11 +1610,12 @@ static bool obj_request_type_valid(enum obj_request_type type)
>  	}
>  }
>  
> -static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
> -				struct rbd_obj_request *obj_request)
> +static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
>  {
> -	dout("%s %p\n", __func__, obj_request);
> -	return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
> +	struct ceph_osd_request *osd_req = obj_request->osd_req;
> +
> +	dout("%s %p osd_req %p\n", __func__, obj_request, osd_req);
> +	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
>  }
>  
>  static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
> @@ -2638,7 +2639,6 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
>  {
>  	struct rbd_obj_request *orig_request;
>  	struct ceph_osd_request *osd_req;
> -	struct ceph_osd_client *osdc;
>  	struct rbd_device *rbd_dev;
>  	struct page **pages;
>  	enum obj_operation_type op_type;
> @@ -2675,13 +2675,9 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
>  	 * and re-submit the original write request.
>  	 */
>  	if (!rbd_dev->parent_overlap) {
> -		struct ceph_osd_client *osdc;
> -
>  		ceph_release_page_vector(pages, page_count);
> -		osdc = &rbd_dev->rbd_client->client->osdc;
> -		img_result = rbd_obj_request_submit(osdc, orig_request);
> -		if (!img_result)
> -			return;
> +		rbd_obj_request_submit(orig_request);
> +		return;
>  	}
>  
>  	if (img_result)
> @@ -2715,10 +2711,9 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
>  
>  	/* All set, send it off. */
>  
> -	osdc = &rbd_dev->rbd_client->client->osdc;
> -	img_result = rbd_obj_request_submit(osdc, orig_request);
> -	if (!img_result)
> -		return;
> +	rbd_obj_request_submit(orig_request);
> +	return;
> +
>  out_err:
>  	/* Record the error code and complete the request */
>  
> @@ -2852,17 +2847,13 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
>  
>  	/*
>  	 * If the overlap has become 0 (most likely because the
> -	 * image has been flattened) we need to free the pages
> -	 * and re-submit the original write request.
> +	 * image has been flattened) we need to re-submit the
> +	 * original request.
>  	 */
>  	rbd_dev = orig_request->img_request->rbd_dev;
>  	if (!rbd_dev->parent_overlap) {
> -		struct ceph_osd_client *osdc;
> -
> -		osdc = &rbd_dev->rbd_client->client->osdc;
> -		result = rbd_obj_request_submit(osdc, orig_request);
> -		if (!result)
> -			return;
> +		rbd_obj_request_submit(orig_request);
> +		return;
>  	}
>  
>  	/*
> @@ -2894,7 +2885,6 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
>  {
>  	struct rbd_obj_request *stat_request;
>  	struct rbd_device *rbd_dev;
> -	struct ceph_osd_client *osdc;
>  	struct page **pages = NULL;
>  	u32 page_count;
>  	size_t size;
> @@ -2938,8 +2928,9 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
>  					false, false);
>  	rbd_osd_req_format_read(stat_request);
>  
> -	osdc = &rbd_dev->rbd_client->client->osdc;
> -	ret = rbd_obj_request_submit(osdc, stat_request);
> +	rbd_obj_request_submit(stat_request);
> +	return 0;
> +
>  out:
>  	if (ret)
>  		rbd_obj_request_put(obj_request);
> @@ -2996,13 +2987,8 @@ static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
>  static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
>  {
>  	if (img_obj_request_simple(obj_request)) {
> -		struct rbd_device *rbd_dev;
> -		struct ceph_osd_client *osdc;
> -
> -		rbd_dev = obj_request->img_request->rbd_dev;
> -		osdc = &rbd_dev->rbd_client->client->osdc;
> -
> -		return rbd_obj_request_submit(osdc, obj_request);
> +		rbd_obj_request_submit(obj_request);
> +		return 0;
>  	}
>  
>  	/*
> @@ -3065,12 +3051,8 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
>  	rbd_assert(obj_request->img_request);
>  	rbd_dev = obj_request->img_request->rbd_dev;
>  	if (!rbd_dev->parent_overlap) {
> -		struct ceph_osd_client *osdc;
> -
> -		osdc = &rbd_dev->rbd_client->client->osdc;
> -		img_result = rbd_obj_request_submit(osdc, obj_request);
> -		if (!img_result)
> -			return;
> +		rbd_obj_request_submit(obj_request);
> +		return;
>  	}
>  
>  	obj_request->result = img_result;
> @@ -3997,7 +3979,6 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
>  			     void *inbound,
>  			     size_t inbound_size)
>  {
> -	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
>  	struct rbd_obj_request *obj_request;
>  	struct page **pages;
>  	u32 page_count;
> @@ -4048,9 +4029,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
>  					0, false, false);
>  	rbd_osd_req_format_read(obj_request);
>  
> -	ret = rbd_obj_request_submit(osdc, obj_request);
> -	if (ret)
> -		goto out;
> +	rbd_obj_request_submit(obj_request);
>  	ret = rbd_obj_request_wait(obj_request);
>  	if (ret)
>  		goto out;
> @@ -4255,7 +4234,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
>  				u64 offset, u64 length, void *buf)
>  
>  {
> -	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
>  	struct rbd_obj_request *obj_request;
>  	struct page **pages = NULL;
>  	u32 page_count;
> @@ -4290,9 +4268,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
>  					false, false);
>  	rbd_osd_req_format_read(obj_request);
>  
> -	ret = rbd_obj_request_submit(osdc, obj_request);
> -	if (ret)
> -		goto out;
> +	rbd_obj_request_submit(obj_request);
>  	ret = rbd_obj_request_wait(obj_request);
>  	if (ret)
>  		goto out;
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers
  2016-09-19 17:03 ` [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers Ilya Dryomov
@ 2016-09-23 15:57   ` Alex Elder
  2016-09-23 16:07     ` Ilya Dryomov
  2016-09-26 12:43   ` David Disseldorp
  1 sibling, 1 reply; 33+ messages in thread
From: Alex Elder @ 2016-09-23 15:57 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel; +Cc: Alex Elder

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> Assert once in rbd_img_obj_request_submit().

Generally I prefer validating things as early as possible.

But I also value the simplicity of doing it all in one place.
These are assertions, so the conditions checked should never
really happen anyway.  And...  now that we're well past doing
huge and rapid changes, the code is showing its maturity, so
the number of assertions is kind of excessive anyway.

Reviewed-by: Alex Elder <elder@linaro.org>

> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
>  drivers/block/rbd.c | 30 ++++++++++--------------------
>  1 file changed, 10 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index d8b702e3c4d9..027e0817a118 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -2739,21 +2739,14 @@ out_err:
>   */
>  static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
>  {
> -	struct rbd_img_request *img_request = NULL;
> +	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
>  	struct rbd_img_request *parent_request = NULL;
> -	struct rbd_device *rbd_dev;
>  	u64 img_offset;
>  	u64 length;
>  	struct page **pages = NULL;
>  	u32 page_count;
>  	int result;
>  
> -	rbd_assert(obj_request_img_data_test(obj_request));
> -	rbd_assert(obj_request_type_valid(obj_request->type));
> -
> -	img_request = obj_request->img_request;
> -	rbd_assert(img_request != NULL);
> -	rbd_dev = img_request->rbd_dev;
>  	rbd_assert(rbd_dev->parent != NULL);
>  
>  	/*
> @@ -2794,10 +2787,11 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
>  	result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
>  	if (result)
>  		goto out_err;
> +
>  	parent_request->copyup_pages = pages;
>  	parent_request->copyup_page_count = page_count;
> -
>  	parent_request->callback = rbd_img_obj_parent_read_full_callback;
> +
>  	result = rbd_img_request_submit(parent_request);
>  	if (!result)
>  		return 0;
> @@ -2883,8 +2877,8 @@ out:
>  
>  static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
>  {
> +	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
>  	struct rbd_obj_request *stat_request;
> -	struct rbd_device *rbd_dev;
>  	struct page **pages = NULL;
>  	u32 page_count;
>  	size_t size;
> @@ -2915,8 +2909,6 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
>  	stat_request->pages = pages;
>  	stat_request->page_count = page_count;
>  
> -	rbd_assert(obj_request->img_request);
> -	rbd_dev = obj_request->img_request->rbd_dev;
>  	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
>  						   stat_request);
>  	if (!stat_request->osd_req)
> @@ -2940,14 +2932,8 @@ out:
>  
>  static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
>  {
> -	struct rbd_img_request *img_request;
> -	struct rbd_device *rbd_dev;
> -
> -	rbd_assert(obj_request_img_data_test(obj_request));
> -
> -	img_request = obj_request->img_request;
> -	rbd_assert(img_request);
> -	rbd_dev = img_request->rbd_dev;
> +	struct rbd_img_request *img_request = obj_request->img_request;
> +	struct rbd_device *rbd_dev = img_request->rbd_dev;
>  
>  	/* Reads */
>  	if (!img_request_write_test(img_request) &&
> @@ -2986,6 +2972,10 @@ static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
>  
>  static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
>  {
> +	rbd_assert(obj_request_img_data_test(obj_request));
> +	rbd_assert(obj_request_type_valid(obj_request->type));
> +	rbd_assert(obj_request->img_request);
> +
>  	if (img_obj_request_simple(obj_request)) {
>  		rbd_obj_request_submit(obj_request);
>  		return 0;
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers
  2016-09-23 15:57   ` Alex Elder
@ 2016-09-23 16:07     ` Ilya Dryomov
  2016-09-23 16:41       ` Alex Elder
  0 siblings, 1 reply; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-23 16:07 UTC (permalink / raw)
  To: Alex Elder; +Cc: Ceph Development, Alex Elder

On Fri, Sep 23, 2016 at 5:57 PM, Alex Elder <elder@ieee.org> wrote:
> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>> Assert once in rbd_img_obj_request_submit().
>
> Generally I prefer validating things as early as possible.
>
> But I also value the simplicity of doing it all in one place.
> These are assertions, so the conditions checked should never
> really happen anyway.  And...  now that we're well past doing
> huge and rapid changes, the code is showing its maturity, so
> the number of assertions is kind of excessive anyway.

For the record, with this patch we now assert earlier than we were ;)
Also, it doesn't weaken any of the asserts - just moves them to central
location.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers
  2016-09-23 16:07     ` Ilya Dryomov
@ 2016-09-23 16:41       ` Alex Elder
  0 siblings, 0 replies; 33+ messages in thread
From: Alex Elder @ 2016-09-23 16:41 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Ceph Development, Alex Elder

On 09/23/2016 11:07 AM, Ilya Dryomov wrote:
>> > Generally I prefer validating things as early as possible.
>> >
>> > But I also value the simplicity of doing it all in one place.
>> > These are assertions, so the conditions checked should never
>> > really happen anyway.  And...  now that we're well past doing
>> > huge and rapid changes, the code is showing its maturity, so
>> > the number of assertions is kind of excessive anyway.
> For the record, with this patch we now assert earlier than we were ;)
> Also, it doesn't weaken any of the asserts - just moves them to central
> location.

Yes, you're right, it's even better than I thought.	-Alex


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/8] rbd: mark the original request as done if stat request fails
  2016-09-19 17:03 ` [PATCH 3/8] rbd: mark the original request as done if stat request fails Ilya Dryomov
@ 2016-09-23 21:39   ` Alex Elder
  2016-09-26 12:28     ` Ilya Dryomov
  0 siblings, 1 reply; 33+ messages in thread
From: Alex Elder @ 2016-09-23 21:39 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> If stat request fails with something other than -ENOENT (which just
> means that we need to copyup), the original object request is never
> marked as done and therefore never completed.  Fix this by moving the
> mark done + complete snippet from rbd_img_obj_parent_read_full() into
> rbd_img_obj_exists_callback().  The former remains covered, as the
> latter is its only caller (through rbd_img_obj_request_submit()).
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Did you ever see this happen?

This little block of code (setting an error and marking a
request done) is found in two other spots in the code; maybe
it could be encapsulated.  (That wouldn't have helped in
this case, however.)

It took me a while to refresh on all the moving parts here.
Let me explain what I see.

No matter what, when an object request gets submitted, it
must eventually have its result code set, get marked done,
and be completed by a call to rbd_obj_request_complete().

An image object gets submitted by rbd_img_obj_request_submit().
Initially, rbd_img_request_submit() calls this for every object
request in the image request.  If any errors occur here, the
image request is dropped, and in the process of cleaning up
the image request, its reference to its objects are dropped as
well.  (Here it's possible some requests are never submitted,
and therefore never marked done...  But I think that's OK.)

There is one other place rbd_img_obj_request_submit() gets
called.  Before getting to that, let's look at what it does.

Simple image object requests (reads, or non-layered writes,
or layered writes that are known to not overlap the parent)
require no special treatment.  The object request is submitted,
and it will eventually be marked done and be completed.

For a layered write request, we need to know whether an image
object exists, and if we don't know we submit a stat call for
that object so we can find out.  When that call completes,
rbd_img_obj_exists_callback() records the result (whether the
object exists), and then re-submits the original image object
request.  This is the second spot rbd_img_obj_request_submit()
is called.  If an error occurs at this stage, we need to mark
the original request done and complete it.
--> This is the location of the bug this patch fixes--the
    original request was not getting marked done in the event
    of an error.

The last case is a call to rbd_img_obj_request_submit() for
a non-simple request in which we know the target image object
doesn't exist.  So in that case we issue a read of the data
in the parent object backing the full (target) image object,
in rbd_img_obj_parent_read_full().  This is necessary so that
data can be supplied to the target OSD in a copyup request.
Previously, if an error occurred calling that function, the
original image object request would be marked done and completed.
Otherwise, that parent image would, when complete, cause
rbd_img_obj_parent_read_full_callback() to be called.  This
patch changes that (discussed below).

If an error occurs in rbd_img_obj_parent_read_full_callback(),
that function marks the original image object request done
and completes it.  Otherwise the original request is converted
into a copyup request, and that gets submitted to the original
image object.  Eventually rbd_osd_copyup_callback() is called,
which marks the original request done, and as a result,
rbd_osd_req_callback() completes that request.

This covers all the cases.

Now about the change...

In in rbd_img_obj_parent_read_full() you have eliminated
setting the object request result, marking it done, and
completing it.  Now that function will simply return an
error if one occurs.  The only caller of that function is
rbd_img_obj_request_submit(), and as laid out above, we
know an error occurring there leads to the original image
object request being marked done and completed.

Or rather, it's done properly provided the fix for the
bug in rbd_img_obj_exists_callback() is in place.

So I guess I'd say this looks good, and in summary:

Reviewed-by: Alex Elder <elder@linaro.org>

I have one minor suggestion below.

> ---
>  drivers/block/rbd.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 027e0817a118..b247200a0f28 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -2805,10 +2805,6 @@ out_err:
>  		ceph_release_page_vector(pages, page_count);
>  	if (parent_request)
>  		rbd_img_request_put(parent_request);
> -	obj_request->result = result;
> -	obj_request->xferred = 0;
> -	obj_request_done_set(obj_request);
> -

You should change the comment at the top of this function so
it indicates that it is no longer this function that takes
care of passing the error on to the original image object
request.

>  	return result;
>  }
>  
> @@ -2860,19 +2856,25 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
>  		obj_request_existence_set(orig_request, true);
>  	} else if (result == -ENOENT) {
>  		obj_request_existence_set(orig_request, false);
> -	} else if (result) {
> -		orig_request->result = result;
> -		goto out;
> +	} else {
> +		goto fail_orig_request;
>  	}
>  
>  	/*
>  	 * Resubmit the original request now that we have recorded
>  	 * whether the target object exists.
>  	 */
> -	orig_request->result = rbd_img_obj_request_submit(orig_request);
> -out:
> -	if (orig_request->result)
> -		rbd_obj_request_complete(orig_request);
> +	result = rbd_img_obj_request_submit(orig_request);
> +	if (result)
> +		goto fail_orig_request;
> +
> +	return;
> +
> +fail_orig_request:
> +	orig_request->result = result;
> +	orig_request->xferred = 0;
> +	obj_request_done_set(orig_request);
> +	rbd_obj_request_complete(orig_request);
>  }
>  
>  static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit()
  2016-09-19 17:03 ` [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit() Ilya Dryomov
@ 2016-09-25 15:56   ` Alex Elder
  2016-09-26 14:38     ` Ilya Dryomov
  0 siblings, 1 reply; 33+ messages in thread
From: Alex Elder @ 2016-09-25 15:56 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> Commit 0f2d5be792b0 ("rbd: use reference counts for image requests")
> added rbd_img_request_get(), which rbd_img_request_fill() calls for
> each obj_request added to img_request.  It was an urgent band-aid for
> the uglyness that is rbd_img_obj_callback() and none of the error paths
> were updated.

I'm not sure how to improve that function.  Object requests in
an image request need to be completed in sequential order,
because blk_update_request() requires that.  (Or maybe you
were referring to something else you find ugly.)

> Given that this img_request reference is meant to represent an
> obj_request that hasn't passed through rbd_img_obj_callback() yet,

It was, for the purpose of that commit.  But I don't like to think
of reference counting that way.  I usually try to reason about
reference counts as actual counts of pointers referring to objects.
And in that sense, I think it might have been better in the first
place to drop the an object request's reference to the image request
in rbd_img_obj_request_del(), where the img_request pointer gets
nulled out.  And move the call to rbd_img_request_get() for an
object request into rbd_img_obj_request_add(), where the pointer
gets assigned.  I don't know why I didn't do it that way before;
like you said it was sort of an urgently developed change.

I haven't investigated this alternative exhaustively, but if
it works I think it would be a cleaner fix.

> proper cleanup in appropriate destructors is a challenge.  However,
> noting that if we don't get a chance to call rbd_obj_request_complete(),
> there is not going to be a call to rbd_img_obj_callback(), we can move
> rbd_img_request_get() into rbd_obj_request_submit() and fixup the two
> places that call rbd_obj_request_complete() directly and not through
> rbd_obj_request_submit() to temporarily bump img_request, so that
> rbd_img_obj_callback() can put as usual.
> 
> This takes care of img_request leaks on errors on the submit side.

So *this* is the problem you're aiming to solve here...  It would
have been nice for you to call attention to where these leaks were
occurring.  (It seems rbd_img_obj_parent_read_full_callback()
and rbd_img_obj_exists_callback().)

It looks to me like your change is sound, but again, I think it
would be better to make the reference counting match the actual
recording of references to objects as I mentioned before.  I'm
going to ask for your opinion on that before I offer you my
"reviewed by" on this.

					-Alex

And now, some more gratuitous analysis...

We currently (without this patch) get a reference to an image request
for every object request populated in rbd_img_request_fill(), right
after calling rbd_img_obj_request_fill().  We want each of these to
have a matching reference drop, once that object request is completed,
and the object request no longer has any use for what's in the image
request.  Right now that occurs in rbd_img_obj_callback().  But that
function is only called if an image object request gets successfully
submitted.  And in particular, if an error occurs while processing
a layered write--either while doing an existence check for an object
or when doing a read of the parent data--the original image object
requests never get submitted, and therefore their references to the
image don't get dropped properly.  This issue is the subject of your
patch.  All other image object requests appear to get submitted,
and therefore properly release their reference.

There are two other image request references.  A reference taken
before submitting all the object requests in rbd_img_request_submit();
that reference is dropped in that same function after all object
requests are submitted.

The last reference is the reference that is set when the image
request is created.  That reference should be dropped when we are
done with the image request.  If an image request has no callback
function, that reference is dropped in rbd_img_request_complete().
If there is a callback, that callback function must eventually
lead to the image request reference being dropped.  Only two
image request callback functions are ever assigned:
  rbd_img_parent_read_callback()
  rbd_img_parent_read_full_callback()
Both of those functions grab information needed from the image,
then drop the "original" image request reference.

I might have missed something but I think with your fix, or with
the alternative I propose, we'll have all image request references
accounted for.

> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
>  drivers/block/rbd.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index b247200a0f28..d8070bd29fd1 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -1610,11 +1610,17 @@ static bool obj_request_type_valid(enum obj_request_type type)
>  	}
>  }
>  
> +static void rbd_img_obj_callback(struct rbd_obj_request *obj_request);
> +
>  static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
>  {
>  	struct ceph_osd_request *osd_req = obj_request->osd_req;
>  
>  	dout("%s %p osd_req %p\n", __func__, obj_request, osd_req);
> +	if (obj_request_img_data_test(obj_request)) {
> +		WARN_ON(obj_request->callback != rbd_img_obj_callback);
> +		rbd_img_request_get(obj_request->img_request);
> +	}
>  	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
>  }
>  
> @@ -2580,8 +2586,6 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
>  
>  		rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0);
>  
> -		rbd_img_request_get(img_request);
> -
>  		img_offset += length;
>  		resid -= length;
>  	}
> @@ -2715,10 +2719,9 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
>  	return;
>  
>  out_err:
> -	/* Record the error code and complete the request */
> -
>  	orig_request->result = img_result;
>  	orig_request->xferred = 0;
> +	rbd_img_request_get(orig_request->img_request);
>  	obj_request_done_set(orig_request);
>  	rbd_obj_request_complete(orig_request);
>  }
> @@ -2873,6 +2876,7 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
>  fail_orig_request:
>  	orig_request->result = result;
>  	orig_request->xferred = 0;
> +	rbd_img_request_get(orig_request->img_request);
>  	obj_request_done_set(orig_request);
>  	rbd_obj_request_complete(orig_request);
>  }
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback()
  2016-09-19 17:03 ` [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback() Ilya Dryomov
@ 2016-09-25 16:02   ` Alex Elder
  2016-09-26 12:58   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: Alex Elder @ 2016-09-25 16:02 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> - fix parent_length == img_request->xferred assert to not fire on
>   copyup read failures
> - don't leak pages if copyup read fails or we can't allocate a new osd
>   request
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Looks good.

Reviewed-by: Alex Elder <elder@linaro.org>

> ---
>  drivers/block/rbd.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index d8070bd29fd1..b9a4e79a663f 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -2666,7 +2666,7 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
>  	rbd_assert(obj_request_type_valid(orig_request->type));
>  	img_result = img_request->result;
>  	parent_length = img_request->length;
> -	rbd_assert(parent_length == img_request->xferred);
> +	rbd_assert(img_result || parent_length == img_request->xferred);
>  	rbd_img_request_put(img_request);
>  
>  	rbd_assert(orig_request->img_request);
> @@ -2719,6 +2719,7 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
>  	return;
>  
>  out_err:
> +	ceph_release_page_vector(pages, page_count);
>  	orig_request->result = img_result;
>  	orig_request->xferred = 0;
>  	rbd_img_request_get(orig_request->img_request);
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths
  2016-09-19 17:03 ` [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths Ilya Dryomov
@ 2016-09-25 16:30   ` Alex Elder
  2016-09-26 12:05   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: Alex Elder @ 2016-09-25 16:30 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> - don't put obj_request if rbd_obj_request_create() fails

Not only that, don't leak the page vector in that case either.

> - don't leak stat_request if rbd_osd_req_create() fails

I believe the reference that was being dropped at the end of
the function was intended to match the one taken for the
assignment of stat_request->obj_request.  But clearly the
error path wasn't right.  Your new code no longer has an
error path that requires dropping that reference.  It is
dropped near the top of rbd_img_obj_exists_callback().

In your new code you drop a reference to the stat request,
but that's to match the one taken when that request was
created.

> Reported-by: David Disseldorp <ddiss@suse.de>
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

This looks good.

Reviewed-by: Alex Elder <elder@linaro.org>

> ---
>  drivers/block/rbd.c | 42 ++++++++++++++++++++++--------------------
>  1 file changed, 22 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index b9a4e79a663f..03f171067e08 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -2886,11 +2886,23 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
>  {
>  	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
>  	struct rbd_obj_request *stat_request;
> -	struct page **pages = NULL;
> +	struct page **pages;
>  	u32 page_count;
>  	size_t size;
>  	int ret;
>  
> +	stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
> +					      OBJ_REQUEST_PAGES);
> +	if (!stat_request)
> +		return -ENOMEM;
> +
> +	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
> +						   stat_request);
> +	if (!stat_request->osd_req) {
> +		ret = -ENOMEM;
> +		goto fail_stat_request;
> +	}
> +
>  	/*
>  	 * The response data for a STAT call consists of:
>  	 *     le64 length;
> @@ -2902,38 +2914,28 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
>  	size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
>  	page_count = (u32)calc_pages_for(0, size);
>  	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
> -	if (IS_ERR(pages))
> -		return PTR_ERR(pages);
> +	if (IS_ERR(pages)) {
> +		ret = PTR_ERR(pages);
> +		goto fail_stat_request;
> +	}
>  
> -	ret = -ENOMEM;
> -	stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
> -							OBJ_REQUEST_PAGES);
> -	if (!stat_request)
> -		goto out;
> +	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
> +	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
> +				     false, false);
>  
>  	rbd_obj_request_get(obj_request);
>  	stat_request->obj_request = obj_request;
>  	stat_request->pages = pages;
>  	stat_request->page_count = page_count;
> -
> -	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
> -						   stat_request);
> -	if (!stat_request->osd_req)
> -		goto out;
>  	stat_request->callback = rbd_img_obj_exists_callback;
>  
> -	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
> -	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
> -					false, false);
>  	rbd_osd_req_format_read(stat_request);
>  
>  	rbd_obj_request_submit(stat_request);
>  	return 0;
>  
> -out:
> -	if (ret)
> -		rbd_obj_request_put(obj_request);
> -
> +fail_stat_request:
> +	rbd_obj_request_put(stat_request);
>  	return ret;
>  }
>  
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests
  2016-09-19 17:03 ` [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests Ilya Dryomov
@ 2016-09-25 16:44   ` Alex Elder
  2016-09-26 16:37     ` Ilya Dryomov
  2016-09-26 12:05   ` David Disseldorp
  1 sibling, 1 reply; 33+ messages in thread
From: Alex Elder @ 2016-09-25 16:44 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> Accessing obj_request->img_request union field is only valid for object
> requests associated with an image (i.e. if obj_request_img_data_test()
> returns true).  rbd_osd_req_format_read() used to do more, but now it
> just sets osd_req->snap_id.  Standalone and stat object requests always
> go to the HEAD revision and are fine with CEPH_NOSNAP set by libceph,
> so get around the invalid union field use by simply not calling
> rbd_osd_req_format_read() in those places.

Since rbd_osd_req_format_{read,write} are now only called
from rbd_img_obj_request_fill(), and they're really simple,
maybe they could be eliminated entirely and open-coded
instead.  We already have the img_request and osd_req pointers
in the caller's context.

Stat requests go to the HEAD because they are only involved for
a layered write, which *cannot* be a snapshot.  I suppose that
is something that could have been (or be) asserted.

Is it guaranteed/required that method calls go to the HEAD?
I'm sure they all do now, but my question is whether there's
a good reason that it will *always* be true.  Maybe it is.

Similarly, rbd_obj_read_sync() is currently used only for
standalone object requests.  But it's conceivable it could
be useful for image objects.  (I'm not too concerned about
that though...)

Anwyay, this looks fine.  My reservations are all about
ensuring these assumptions are clear in the code.  Maybe
a comment or two is warranted.

In any case...

Reviewed-by: Alex Elder <elder@linaro.org>

> Reported-by: David Disseldorp <ddiss@suse.de>
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
>  drivers/block/rbd.c | 9 ++-------
>  1 file changed, 2 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 03f171067e08..8907ee6342ac 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -1943,11 +1943,10 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
>  
>  static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
>  {
> -	struct rbd_img_request *img_request = obj_request->img_request;
>  	struct ceph_osd_request *osd_req = obj_request->osd_req;
>  
> -	if (img_request)
> -		osd_req->r_snapid = img_request->snap_id;
> +	rbd_assert(obj_request_img_data_test(obj_request));
> +	osd_req->r_snapid = obj_request->img_request->snap_id;
>  }
>  
>  static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
> @@ -2929,8 +2928,6 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
>  	stat_request->page_count = page_count;
>  	stat_request->callback = rbd_img_obj_exists_callback;
>  
> -	rbd_osd_req_format_read(stat_request);
> -
>  	rbd_obj_request_submit(stat_request);
>  	return 0;
>  
> @@ -4026,7 +4023,6 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
>  	osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
>  					obj_request->pages, inbound_size,
>  					0, false, false);
> -	rbd_osd_req_format_read(obj_request);
>  
>  	rbd_obj_request_submit(obj_request);
>  	ret = rbd_obj_request_wait(obj_request);
> @@ -4265,7 +4261,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
>  					obj_request->length,
>  					obj_request->offset & ~PAGE_MASK,
>  					false, false);
> -	rbd_osd_req_format_read(obj_request);
>  
>  	rbd_obj_request_submit(obj_request);
>  	ret = rbd_obj_request_wait(obj_request);
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 8/8] rbd: img_data requests don't own their page array
  2016-09-19 17:03 ` [PATCH 8/8] rbd: img_data requests don't own their page array Ilya Dryomov
@ 2016-09-25 17:06   ` Alex Elder
  2016-09-26 15:33     ` David Disseldorp
  0 siblings, 1 reply; 33+ messages in thread
From: Alex Elder @ 2016-09-25 17:06 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel

On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> Move the check into rbd_obj_request_destroy(), to avoid use-after-free
> on errors in rbd_img_request_fill().
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Is this because an error occurring in rbd_img_request_fill()
causes rbd_img_obj_request_del() to be called, which leads to
rbd_obj_request_destroy(), which (by this time) has not yet
had its obj_request->pages pointer set to NULL because the
object request is still outstanding?  (Your explanation was
a little brief...)

The change in rbd_obj_request_destroy() looks good for that
case.

The code deleted in rbd_img_obj_end_request() could still stay,
however.  Almost everywhere, pointers are reassigned to NULL
when it's known the thing referred to is no longer needed.
It's useful in post-mortem understanding of what's occurred.

I guess it's fine with me either way.

Reviewed-by: Alex Elder <elder@linaro.org>

> ---
>  drivers/block/rbd.c | 11 +++--------
>  1 file changed, 3 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 8907ee6342ac..d305b9ebe2cf 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -2139,7 +2139,9 @@ static void rbd_obj_request_destroy(struct kref *kref)
>  			bio_chain_put(obj_request->bio_list);
>  		break;
>  	case OBJ_REQUEST_PAGES:
> -		if (obj_request->pages)
> +		/* img_data requests don't own their page array */
> +		if (obj_request->pages &&
> +		    !obj_request_img_data_test(obj_request))
>  			ceph_release_page_vector(obj_request->pages,
>  						obj_request->page_count);
>  		break;
> @@ -2360,13 +2362,6 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
>  		xferred = obj_request->length;
>  	}
>  
> -	/* Image object requests don't own their page array */
> -
> -	if (obj_request->type == OBJ_REQUEST_PAGES) {
> -		obj_request->pages = NULL;
> -		obj_request->page_count = 0;
> -	}
> -
>  	if (img_request_child_test(img_request)) {
>  		rbd_assert(img_request->obj_request != NULL);
>  		more = obj_request->which < img_request->obj_request_count - 1;
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests
  2016-09-19 17:03 ` [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests Ilya Dryomov
  2016-09-25 16:44   ` Alex Elder
@ 2016-09-26 12:05   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: David Disseldorp @ 2016-09-26 12:05 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel, Alex Elder

On Mon, 19 Sep 2016 19:03:27 +0200, Ilya Dryomov wrote:

> Accessing obj_request->img_request union field is only valid for object
> requests associated with an image (i.e. if obj_request_img_data_test()
> returns true).  rbd_osd_req_format_read() used to do more, but now it
> just sets osd_req->snap_id.  Standalone and stat object requests always
> go to the HEAD revision and are fine with CEPH_NOSNAP set by libceph,
> so get around the invalid union field use by simply not calling
> rbd_osd_req_format_read() in those places.
> 
> Reported-by: David Disseldorp <ddiss@suse.de>
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Looks fine.
Reviewed-by: David Disseldorp <ddiss@suse.de>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths
  2016-09-19 17:03 ` [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths Ilya Dryomov
  2016-09-25 16:30   ` Alex Elder
@ 2016-09-26 12:05   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: David Disseldorp @ 2016-09-26 12:05 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel, Alex Elder

On Mon, 19 Sep 2016 19:03:26 +0200, Ilya Dryomov wrote:

> - don't put obj_request if rbd_obj_request_create() fails
> - don't leak stat_request if rbd_osd_req_create() fails
> 
> Reported-by: David Disseldorp <ddiss@suse.de>
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Looks good.
Reviewed-by: David Disseldorp <ddiss@suse.de>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/8] rbd: mark the original request as done if stat request fails
  2016-09-23 21:39   ` Alex Elder
@ 2016-09-26 12:28     ` Ilya Dryomov
  0 siblings, 0 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-26 12:28 UTC (permalink / raw)
  To: Alex Elder; +Cc: Ceph Development

On Fri, Sep 23, 2016 at 11:39 PM, Alex Elder <elder@linaro.org> wrote:
> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>> If stat request fails with something other than -ENOENT (which just
>> means that we need to copyup), the original object request is never
>> marked as done and therefore never completed.  Fix this by moving the
>> mark done + complete snippet from rbd_img_obj_parent_read_full() into
>> rbd_img_obj_exists_callback().  The former remains covered, as the
>> latter is its only caller (through rbd_img_obj_request_submit()).
>>
>> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
>
> Did you ever see this happen?

No, this series was all just staring at the surroundings of
rbd_img_obj_exists_submit(), prompted by David's patches.

>
> This little block of code (setting an error and marking a
> request done) is found in two other spots in the code; maybe
> it could be encapsulated.  (That wouldn't have helped in
> this case, however.)

I'll go ahead and pull it into a new rbd_obj_request_error().

>
> It took me a while to refresh on all the moving parts here.
> Let me explain what I see.
>
> No matter what, when an object request gets submitted, it
> must eventually have its result code set, get marked done,
> and be completed by a call to rbd_obj_request_complete().
>
> An image object gets submitted by rbd_img_obj_request_submit().
> Initially, rbd_img_request_submit() calls this for every object
> request in the image request.  If any errors occur here, the
> image request is dropped, and in the process of cleaning up
> the image request, its reference to its objects are dropped as
> well.  (Here it's possible some requests are never submitted,
> and therefore never marked done...  But I think that's OK.)
>
> There is one other place rbd_img_obj_request_submit() gets
> called.  Before getting to that, let's look at what it does.
>
> Simple image object requests (reads, or non-layered writes,
> or layered writes that are known to not overlap the parent)
> require no special treatment.  The object request is submitted,
> and it will eventually be marked done and be completed.
>
> For a layered write request, we need to know whether an image
> object exists, and if we don't know we submit a stat call for
> that object so we can find out.  When that call completes,
> rbd_img_obj_exists_callback() records the result (whether the
> object exists), and then re-submits the original image object
> request.  This is the second spot rbd_img_obj_request_submit()
> is called.  If an error occurs at this stage, we need to mark
> the original request done and complete it.
> --> This is the location of the bug this patch fixes--the
>     original request was not getting marked done in the event
>     of an error.

Correct.

>
> The last case is a call to rbd_img_obj_request_submit() for
> a non-simple request in which we know the target image object
> doesn't exist.  So in that case we issue a read of the data
> in the parent object backing the full (target) image object,
> in rbd_img_obj_parent_read_full().  This is necessary so that
> data can be supplied to the target OSD in a copyup request.
> Previously, if an error occurred calling that function, the
> original image object request would be marked done and completed.
> Otherwise, that parent image would, when complete, cause
> rbd_img_obj_parent_read_full_callback() to be called.  This
> patch changes that (discussed below).
>
> If an error occurs in rbd_img_obj_parent_read_full_callback(),
> that function marks the original image object request done
> and completes it.  Otherwise the original request is converted
> into a copyup request, and that gets submitted to the original
> image object.  Eventually rbd_osd_copyup_callback() is called,
> which marks the original request done, and as a result,
> rbd_osd_req_callback() completes that request.
>
> This covers all the cases.
>
> Now about the change...
>
> In in rbd_img_obj_parent_read_full() you have eliminated
> setting the object request result, marking it done, and
> completing it.  Now that function will simply return an
> error if one occurs.  The only caller of that function is
> rbd_img_obj_request_submit(), and as laid out above, we
> know an error occurring there leads to the original image
> object request being marked done and completed.
>
> Or rather, it's done properly provided the fix for the
> bug in rbd_img_obj_exists_callback() is in place.
>
> So I guess I'd say this looks good, and in summary:
>
> Reviewed-by: Alex Elder <elder@linaro.org>
>
> I have one minor suggestion below.
>
>> ---
>>  drivers/block/rbd.c | 24 +++++++++++++-----------
>>  1 file changed, 13 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 027e0817a118..b247200a0f28 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -2805,10 +2805,6 @@ out_err:
>>               ceph_release_page_vector(pages, page_count);
>>       if (parent_request)
>>               rbd_img_request_put(parent_request);
>> -     obj_request->result = result;
>> -     obj_request->xferred = 0;
>> -     obj_request_done_set(obj_request);
>> -
>
> You should change the comment at the top of this function so
> it indicates that it is no longer this function that takes
> care of passing the error on to the original image object
> request.

Done.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/8] rbd: change rbd_obj_request_submit() signature
  2016-09-19 17:03 ` [PATCH 1/8] rbd: change rbd_obj_request_submit() signature Ilya Dryomov
  2016-09-23 15:38   ` Alex Elder
@ 2016-09-26 12:38   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: David Disseldorp @ 2016-09-26 12:38 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel, Alex Elder

On Mon, 19 Sep 2016 19:03:21 +0200, Ilya Dryomov wrote:

> - osdc parameter is useless
> - starting with commit 5aea3dcd5021 ("libceph: a major OSD client
>   update"), ceph_osdc_start_request() always returns success
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Looks good.
Reviewed-by: David Disseldorp <ddiss@suse.de>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers
  2016-09-19 17:03 ` [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers Ilya Dryomov
  2016-09-23 15:57   ` Alex Elder
@ 2016-09-26 12:43   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: David Disseldorp @ 2016-09-26 12:43 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel, Alex Elder

On Mon, 19 Sep 2016 19:03:22 +0200, Ilya Dryomov wrote:

> Assert once in rbd_img_obj_request_submit().
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Looks fine.
Reviewed-by: David Disseldorp <ddiss@suse.de>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback()
  2016-09-19 17:03 ` [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback() Ilya Dryomov
  2016-09-25 16:02   ` Alex Elder
@ 2016-09-26 12:58   ` David Disseldorp
  1 sibling, 0 replies; 33+ messages in thread
From: David Disseldorp @ 2016-09-26 12:58 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel, Alex Elder

On Mon, 19 Sep 2016 19:03:25 +0200, Ilya Dryomov wrote:

> - fix parent_length == img_request->xferred assert to not fire on
>   copyup read failures
> - don't leak pages if copyup read fails or we can't allocate a new osd
>   request
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Looks fine.
Reviewed-by: David Disseldorp <ddiss@suse.de>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit()
  2016-09-25 15:56   ` Alex Elder
@ 2016-09-26 14:38     ` Ilya Dryomov
  2016-09-28  0:22       ` Alex Elder
  0 siblings, 1 reply; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-26 14:38 UTC (permalink / raw)
  To: Alex Elder; +Cc: Ceph Development

On Sun, Sep 25, 2016 at 5:56 PM, Alex Elder <elder@linaro.org> wrote:
> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>> Commit 0f2d5be792b0 ("rbd: use reference counts for image requests")
>> added rbd_img_request_get(), which rbd_img_request_fill() calls for
>> each obj_request added to img_request.  It was an urgent band-aid for
>> the uglyness that is rbd_img_obj_callback() and none of the error paths
>> were updated.
>
> I'm not sure how to improve that function.  Object requests in
> an image request need to be completed in sequential order,
> because blk_update_request() requires that.  (Or maybe you
> were referring to something else you find ugly.)

Quoting 0f2d5be792b0 commit message:

"There is a race here, however.  The instant an object request is
marked "done" it can be provided (by a thread handling completion of
one of its predecessor operations) to rbd_img_obj_end_request(),
which (for the last request) can then lead to the image request
getting torn down.  And this can happen *before* that object has
itself entered rbd_img_obj_end_request().  As a result, once it
*does* enter that function, the image request (and even the object
request itself) may have been freed and become invalid."

"... we don't want rbd_img_request_complete() to necessarily
result in the image request being destroyed, because it may
get called before we've finished processing on all of its
object requests."

This "I'm called for obj_request1, but I'll also complete obj_request2
along with the entire img_request if I get a chance, before I'm called
for obj_request2" behaviour is *really* *really* sneaky.

I think a simple atomic, decremented for each obj_request completion
would do.  When it hits 0, we say "OK, this img_request is finished"
and call rbd_img_obj_end_request() for each obj_request in order.  I'm
not sure we are getting anything from calling blk_update_request() in
this fashion, at the expense of hard to comprehend buggy code.

>
>> Given that this img_request reference is meant to represent an
>> obj_request that hasn't passed through rbd_img_obj_callback() yet,
>
> It was, for the purpose of that commit.  But I don't like to think
> of reference counting that way.  I usually try to reason about
> reference counts as actual counts of pointers referring to objects.
> And in that sense, I think it might have been better in the first
> place to drop the an object request's reference to the image request
> in rbd_img_obj_request_del(), where the img_request pointer gets
> nulled out.  And move the call to rbd_img_request_get() for an
> object request into rbd_img_obj_request_add(), where the pointer
> gets assigned.  I don't know why I didn't do it that way before;
> like you said it was sort of an urgently developed change.
>
> I haven't investigated this alternative exhaustively, but if
> it works I think it would be a cleaner fix.

That won't work, exactly because this reference is not a normal
reference, but a band-aid for what I quoted.  rbd_img_obj_request_del()
is called from rbd_img_request_put() when refcount hits 0, which it
never will because of at least one object request holding it.  For this
to work, object requests have to be deleted explicitly.

>
>> proper cleanup in appropriate destructors is a challenge.  However,
>> noting that if we don't get a chance to call rbd_obj_request_complete(),
>> there is not going to be a call to rbd_img_obj_callback(), we can move
>> rbd_img_request_get() into rbd_obj_request_submit() and fixup the two
>> places that call rbd_obj_request_complete() directly and not through
>> rbd_obj_request_submit() to temporarily bump img_request, so that
>> rbd_img_obj_callback() can put as usual.
>>
>> This takes care of img_request leaks on errors on the submit side.
>
> So *this* is the problem you're aiming to solve here...  It would
> have been nice for you to call attention to where these leaks were
> occurring.  (It seems rbd_img_obj_parent_read_full_callback()
> and rbd_img_obj_exists_callback().)

By "on the submit side", I meant everywhere where we error out before
rbd_obj_request_submit() on the original request.

>
> It looks to me like your change is sound, but again, I think it
> would be better to make the reference counting match the actual
> recording of references to objects as I mentioned before.  I'm
> going to ask for your opinion on that before I offer you my
> "reviewed by" on this.

The aim of this patch was to fix up error paths while preserving the
semantics of this kref.  Any other improvements can only come with the
full re-write of the completion code - it's too fragile.

>
>                                         -Alex
>
> And now, some more gratuitous analysis...
>
> We currently (without this patch) get a reference to an image request
> for every object request populated in rbd_img_request_fill(), right
> after calling rbd_img_obj_request_fill().  We want each of these to
> have a matching reference drop, once that object request is completed,
> and the object request no longer has any use for what's in the image
> request.  Right now that occurs in rbd_img_obj_callback().  But that
> function is only called if an image object request gets successfully
> submitted.  And in particular, if an error occurs while processing
> a layered write--either while doing an existence check for an object
> or when doing a read of the parent data--the original image object
> requests never get submitted, and therefore their references to the
> image don't get dropped properly.  This issue is the subject of your
> patch.  All other image object requests appear to get submitted,
> and therefore properly release their reference.

Correct.  There is also the issue of two-obj_request img_requests,
where the first obj_request gets submitted while the second errors out.
This patch captures it, but there may well be other issues there -
I haven't gone down that rabbit hole yet...

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 8/8] rbd: img_data requests don't own their page array
  2016-09-25 17:06   ` Alex Elder
@ 2016-09-26 15:33     ` David Disseldorp
  2016-09-26 17:25       ` Ilya Dryomov
  0 siblings, 1 reply; 33+ messages in thread
From: David Disseldorp @ 2016-09-26 15:33 UTC (permalink / raw)
  To: Alex Elder, Ilya Dryomov; +Cc: ceph-devel

On Sun, 25 Sep 2016 12:06:34 -0500, Alex Elder wrote:

> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
> > Move the check into rbd_obj_request_destroy(), to avoid use-after-free
> > on errors in rbd_img_request_fill().
> > 
> > Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> 
> Is this because an error occurring in rbd_img_request_fill()
> causes rbd_img_obj_request_del() to be called, which leads to
> rbd_obj_request_destroy(), which (by this time) has not yet
> had its obj_request->pages pointer set to NULL because the
> object request is still outstanding?  (Your explanation was
> a little brief...)

I agree, the commit message could be improved...

I think the use after free is in the rbd_img_obj_parent_read_full()
call path - if rbd_img_request_fill() fails after adding an obj_request
to the img_request->obj_requests list, then the corresponding page(s)
will be freed in the rbd_img_request_fill() out_unwind error path *and*
the rbd_img_obj_parent_read_full() error path.

> The change in rbd_obj_request_destroy() looks good for that
> case.
> 
> The code deleted in rbd_img_obj_end_request() could still stay,
> however.  Almost everywhere, pointers are reassigned to NULL
> when it's known the thing referred to is no longer needed.
> It's useful in post-mortem understanding of what's occurred.

Agreed.

> I guess it's fine with me either way.
> 
> Reviewed-by: Alex Elder <elder@linaro.org>

Looks fine to me too.
Reviewed-by: David Disseldorp <ddiss@suse.de>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests
  2016-09-25 16:44   ` Alex Elder
@ 2016-09-26 16:37     ` Ilya Dryomov
  2016-09-27 17:11       ` Alex Elder
  0 siblings, 1 reply; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-26 16:37 UTC (permalink / raw)
  To: Alex Elder; +Cc: Ceph Development

On Sun, Sep 25, 2016 at 6:44 PM, Alex Elder <elder@linaro.org> wrote:
> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>> Accessing obj_request->img_request union field is only valid for object
>> requests associated with an image (i.e. if obj_request_img_data_test()
>> returns true).  rbd_osd_req_format_read() used to do more, but now it
>> just sets osd_req->snap_id.  Standalone and stat object requests always
>> go to the HEAD revision and are fine with CEPH_NOSNAP set by libceph,
>> so get around the invalid union field use by simply not calling
>> rbd_osd_req_format_read() in those places.
>
> Since rbd_osd_req_format_{read,write} are now only called
> from rbd_img_obj_request_fill(), and they're really simple,
> maybe they could be eliminated entirely and open-coded
> instead.  We already have the img_request and osd_req pointers
> in the caller's context.
>
> Stat requests go to the HEAD because they are only involved for
> a layered write, which *cannot* be a snapshot.  I suppose that
> is something that could have been (or be) asserted.

The only reason it's working is img_request->snap_id has the same
offset as obj_request->img_request and we end up setting stat snap_id
to a kernel pointer.  A snapshot with an id like 0xffff...  can't
practically exist, so we get the HEAD response from the OSDs.

>
> Is it guaranteed/required that method calls go to the HEAD?
> I'm sure they all do now, but my question is whether there's
> a good reason that it will *always* be true.  Maybe it is.
>
> Similarly, rbd_obj_read_sync() is currently used only for
> standalone object requests.  But it's conceivable it could
> be useful for image objects.  (I'm not too concerned about
> that though...)
>
> Anwyay, this looks fine.  My reservations are all about
> ensuring these assumptions are clear in the code.  Maybe
> a comment or two is warranted.

libceph initializes snap_id to CEPH_NOSNAP - that's the API.  If the
client isn't setting it, the assumption is clear - HEAD.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 8/8] rbd: img_data requests don't own their page array
  2016-09-26 15:33     ` David Disseldorp
@ 2016-09-26 17:25       ` Ilya Dryomov
  0 siblings, 0 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-26 17:25 UTC (permalink / raw)
  To: David Disseldorp; +Cc: Alex Elder, Ceph Development

On Mon, Sep 26, 2016 at 5:33 PM, David Disseldorp <ddiss@suse.de> wrote:
> On Sun, 25 Sep 2016 12:06:34 -0500, Alex Elder wrote:
>
>> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>> > Move the check into rbd_obj_request_destroy(), to avoid use-after-free
>> > on errors in rbd_img_request_fill().
>> >
>> > Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
>>
>> Is this because an error occurring in rbd_img_request_fill()
>> causes rbd_img_obj_request_del() to be called, which leads to
>> rbd_obj_request_destroy(), which (by this time) has not yet
>> had its obj_request->pages pointer set to NULL because the
>> object request is still outstanding?  (Your explanation was
>> a little brief...)
>
> I agree, the commit message could be improved...
>
> I think the use after free is in the rbd_img_obj_parent_read_full()
> call path - if rbd_img_request_fill() fails after adding an obj_request
> to the img_request->obj_requests list, then the corresponding page(s)
> will be freed in the rbd_img_request_fill() out_unwind error path *and*
> the rbd_img_obj_parent_read_full() error path.

Correct.  The same goes for rbd_img_request_fill(OBJ_REQUEST_PAGES)
call in rbd_img_parent_read().  I'll update the commit message.

>
>> The change in rbd_obj_request_destroy() looks good for that
>> case.
>>
>> The code deleted in rbd_img_obj_end_request() could still stay,
>> however.  Almost everywhere, pointers are reassigned to NULL
>> when it's known the thing referred to is no longer needed.
>> It's useful in post-mortem understanding of what's occurred.
>
> Agreed.

I disagree.  Yes, it sometimes helps if you do it systematically and
all you have is a panic message.  But if you have a vmcore and trying
to do an actual post-mortem, NULLed out pointers tend to work against
you, especially if you NULL out something that hasn't just been freed.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests
  2016-09-26 16:37     ` Ilya Dryomov
@ 2016-09-27 17:11       ` Alex Elder
  0 siblings, 0 replies; 33+ messages in thread
From: Alex Elder @ 2016-09-27 17:11 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Ceph Development

On 09/26/2016 11:37 AM, Ilya Dryomov wrote:
> On Sun, Sep 25, 2016 at 6:44 PM, Alex Elder <elder@linaro.org> wrote:
>> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>>> Accessing obj_request->img_request union field is only valid for object
>>> requests associated with an image (i.e. if obj_request_img_data_test()
>>> returns true).  rbd_osd_req_format_read() used to do more, but now it
>>> just sets osd_req->snap_id.  Standalone and stat object requests always
>>> go to the HEAD revision and are fine with CEPH_NOSNAP set by libceph,
>>> so get around the invalid union field use by simply not calling
>>> rbd_osd_req_format_read() in those places.
>>
>> Since rbd_osd_req_format_{read,write} are now only called
>> from rbd_img_obj_request_fill(), and they're really simple,
>> maybe they could be eliminated entirely and open-coded
>> instead.  We already have the img_request and osd_req pointers
>> in the caller's context.
>>
>> Stat requests go to the HEAD because they are only involved for
>> a layered write, which *cannot* be a snapshot.  I suppose that
>> is something that could have been (or be) asserted.
> 
> The only reason it's working is img_request->snap_id has the same
> offset as obj_request->img_request and we end up setting stat snap_id

Oh wow.  I wasn't even thinking about that.  I was focusing on
the fact that you're no longer calling rbd_osd_req_format_read()
in those two places.  (Looking again, I see the first sentence
in your description talks about the invalid use of that field.)
So yes, clearly we need to test the IMG_DATA flag before using
the image request pointer.

Aside from that what I said is still true.  Any write *must*
be going to a HEAD object.  And yes, the standalone object
requests go to the HEAD object, but there's nothing that
really requires that

Thanks for the explanation.  This is a good fix.

					-Alex

> to a kernel pointer.  A snapshot with an id like 0xffff...  can't
> practically exist, so we get the HEAD response from the OSDs.
> 
>>
>> Is it guaranteed/required that method calls go to the HEAD?
>> I'm sure they all do now, but my question is whether there's
>> a good reason that it will *always* be true.  Maybe it is.
>>
>> Similarly, rbd_obj_read_sync() is currently used only for
>> standalone object requests.  But it's conceivable it could
>> be useful for image objects.  (I'm not too concerned about
>> that though...)
>>
>> Anwyay, this looks fine.  My reservations are all about
>> ensuring these assumptions are clear in the code.  Maybe
>> a comment or two is warranted.
> 
> libceph initializes snap_id to CEPH_NOSNAP - that's the API.  If the
> client isn't setting it, the assumption is clear - HEAD.
> 
> Thanks,
> 
>                 Ilya
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit()
  2016-09-26 14:38     ` Ilya Dryomov
@ 2016-09-28  0:22       ` Alex Elder
  2016-09-29 15:15         ` Ilya Dryomov
  0 siblings, 1 reply; 33+ messages in thread
From: Alex Elder @ 2016-09-28  0:22 UTC (permalink / raw)
  To: Ilya Dryomov, Alex Elder; +Cc: Ceph Development

On 09/26/2016 09:38 AM, Ilya Dryomov wrote:
> On Sun, Sep 25, 2016 at 5:56 PM, Alex Elder <elder@linaro.org> wrote:
>> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>>> Commit 0f2d5be792b0 ("rbd: use reference counts for image requests")
>>> added rbd_img_request_get(), which rbd_img_request_fill() calls for
>>> each obj_request added to img_request.  It was an urgent band-aid for
>>> the uglyness that is rbd_img_obj_callback() and none of the error paths
>>> were updated.
>>
>> I'm not sure how to improve that function.  Object requests in
>> an image request need to be completed in sequential order,
>> because blk_update_request() requires that.  (Or maybe you
>> were referring to something else you find ugly.)
> 
> Quoting 0f2d5be792b0 commit message:
> 
> "There is a race here, however.  The instant an object request is
> marked "done" it can be provided (by a thread handling completion of
> one of its predecessor operations) to rbd_img_obj_end_request(),
> which (for the last request) can then lead to the image request
> getting torn down.  And this can happen *before* that object has
> itself entered rbd_img_obj_end_request().  As a result, once it
> *does* enter that function, the image request (and even the object
> request itself) may have been freed and become invalid."
> 
> "... we don't want rbd_img_request_complete() to necessarily
> result in the image request being destroyed, because it may
> get called before we've finished processing on all of its
> object requests."
> 
> This "I'm called for obj_request1, but I'll also complete obj_request2
> along with the entire img_request if I get a chance, before I'm called
> for obj_request2" behaviour is *really* *really* sneaky.

OK, I understand that.  (And I think you meant "...before I'm
called for obj_request1".)

> I think a simple atomic, decremented for each obj_request completion
> would do.  When it hits 0, we say "OK, this img_request is finished"
> and call rbd_img_obj_end_request() for each obj_request in order.  I'm
> not sure we are getting anything from calling blk_update_request() in
> this fashion, at the expense of hard to comprehend buggy code.

OK, now you're talking about something different, which is to
change how we handle image object request completions.
Practically speaking, I think you're right that there would
be very little lost by handling an image request's object
request completions all at once rather than as quickly as
possible.

I think the get of the img_request in the error paths looks
a little funny.  It'll now be encapsulated in another function,
so maybe you can add a small comment there to say why you need
to do that--to match what the submit path does (or to match
the reference dropped in the completion path).

I have no argument with what you're doing, and I unfortunately
feel less confident (maybe 10% less) in my walk-through of
all the affected code than I normally am on your patches...
But I'll call that good enough.  I only have small blocks of
time right now and I don't want to delay things.

Reviewed-by: Alex Elder <elder@linaro.org>

>>> Given that this img_request reference is meant to represent an
>>> obj_request that hasn't passed through rbd_img_obj_callback() yet,
>>
>> It was, for the purpose of that commit.  But I don't like to think
>> of reference counting that way.  I usually try to reason about
>> reference counts as actual counts of pointers referring to objects.
>> And in that sense, I think it might have been better in the first
>> place to drop the an object request's reference to the image request
>> in rbd_img_obj_request_del(), where the img_request pointer gets
>> nulled out.  And move the call to rbd_img_request_get() for an
>> object request into rbd_img_obj_request_add(), where the pointer
>> gets assigned.  I don't know why I didn't do it that way before;
>> like you said it was sort of an urgently developed change.
>>
>> I haven't investigated this alternative exhaustively, but if
>> it works I think it would be a cleaner fix.
> 
> That won't work, exactly because this reference is not a normal
> reference, but a band-aid for what I quoted.  rbd_img_obj_request_del()
> is called from rbd_img_request_put() when refcount hits 0, which it
> never will because of at least one object request holding it.  For this
> to work, object requests have to be deleted explicitly.
> 
>>
>>> proper cleanup in appropriate destructors is a challenge.  However,
>>> noting that if we don't get a chance to call rbd_obj_request_complete(),
>>> there is not going to be a call to rbd_img_obj_callback(), we can move
>>> rbd_img_request_get() into rbd_obj_request_submit() and fixup the two
>>> places that call rbd_obj_request_complete() directly and not through
>>> rbd_obj_request_submit() to temporarily bump img_request, so that
>>> rbd_img_obj_callback() can put as usual.
>>>
>>> This takes care of img_request leaks on errors on the submit side.
>>
>> So *this* is the problem you're aiming to solve here...  It would
>> have been nice for you to call attention to where these leaks were
>> occurring.  (It seems rbd_img_obj_parent_read_full_callback()
>> and rbd_img_obj_exists_callback().)
> 
> By "on the submit side", I meant everywhere where we error out before
> rbd_obj_request_submit() on the original request.
> 
>>
>> It looks to me like your change is sound, but again, I think it
>> would be better to make the reference counting match the actual
>> recording of references to objects as I mentioned before.  I'm
>> going to ask for your opinion on that before I offer you my
>> "reviewed by" on this.
> 
> The aim of this patch was to fix up error paths while preserving the
> semantics of this kref.  Any other improvements can only come with the
> full re-write of the completion code - it's too fragile.
> 
>>
>>                                         -Alex
>>
>> And now, some more gratuitous analysis...
>>
>> We currently (without this patch) get a reference to an image request
>> for every object request populated in rbd_img_request_fill(), right
>> after calling rbd_img_obj_request_fill().  We want each of these to
>> have a matching reference drop, once that object request is completed,
>> and the object request no longer has any use for what's in the image
>> request.  Right now that occurs in rbd_img_obj_callback().  But that
>> function is only called if an image object request gets successfully
>> submitted.  And in particular, if an error occurs while processing
>> a layered write--either while doing an existence check for an object
>> or when doing a read of the parent data--the original image object
>> requests never get submitted, and therefore their references to the
>> image don't get dropped properly.  This issue is the subject of your
>> patch.  All other image object requests appear to get submitted,
>> and therefore properly release their reference.
> 
> Correct.  There is also the issue of two-obj_request img_requests,
> where the first obj_request gets submitted while the second errors out.
> This patch captures it, but there may well be other issues there -
> I haven't gone down that rabbit hole yet...
> 
> Thanks,
> 
>                 Ilya
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit()
  2016-09-28  0:22       ` Alex Elder
@ 2016-09-29 15:15         ` Ilya Dryomov
  0 siblings, 0 replies; 33+ messages in thread
From: Ilya Dryomov @ 2016-09-29 15:15 UTC (permalink / raw)
  To: Alex Elder; +Cc: Alex Elder, Ceph Development

On Wed, Sep 28, 2016 at 2:22 AM, Alex Elder <elder@ieee.org> wrote:
> On 09/26/2016 09:38 AM, Ilya Dryomov wrote:
>> On Sun, Sep 25, 2016 at 5:56 PM, Alex Elder <elder@linaro.org> wrote:
>>> On 09/19/2016 12:03 PM, Ilya Dryomov wrote:
>>>> Commit 0f2d5be792b0 ("rbd: use reference counts for image requests")
>>>> added rbd_img_request_get(), which rbd_img_request_fill() calls for
>>>> each obj_request added to img_request.  It was an urgent band-aid for
>>>> the uglyness that is rbd_img_obj_callback() and none of the error paths
>>>> were updated.
>>>
>>> I'm not sure how to improve that function.  Object requests in
>>> an image request need to be completed in sequential order,
>>> because blk_update_request() requires that.  (Or maybe you
>>> were referring to something else you find ugly.)
>>
>> Quoting 0f2d5be792b0 commit message:
>>
>> "There is a race here, however.  The instant an object request is
>> marked "done" it can be provided (by a thread handling completion of
>> one of its predecessor operations) to rbd_img_obj_end_request(),
>> which (for the last request) can then lead to the image request
>> getting torn down.  And this can happen *before* that object has
>> itself entered rbd_img_obj_end_request().  As a result, once it
>> *does* enter that function, the image request (and even the object
>> request itself) may have been freed and become invalid."
>>
>> "... we don't want rbd_img_request_complete() to necessarily
>> result in the image request being destroyed, because it may
>> get called before we've finished processing on all of its
>> object requests."
>>
>> This "I'm called for obj_request1, but I'll also complete obj_request2
>> along with the entire img_request if I get a chance, before I'm called
>> for obj_request2" behaviour is *really* *really* sneaky.
>
> OK, I understand that.  (And I think you meant "...before I'm
> called for obj_request1".)
>
>> I think a simple atomic, decremented for each obj_request completion
>> would do.  When it hits 0, we say "OK, this img_request is finished"
>> and call rbd_img_obj_end_request() for each obj_request in order.  I'm
>> not sure we are getting anything from calling blk_update_request() in
>> this fashion, at the expense of hard to comprehend buggy code.
>
> OK, now you're talking about something different, which is to
> change how we handle image object request completions.

Yeah, how we handle image object request completions is the root of the
problem here.  It's the reason we have rbd_img_request_get/put() in the
first place.

> Practically speaking, I think you're right that there would
> be very little lost by handling an image request's object
> request completions all at once rather than as quickly as
> possible.
>
> I think the get of the img_request in the error paths looks
> a little funny.  It'll now be encapsulated in another function,
> so maybe you can add a small comment there to say why you need
> to do that--to match what the submit path does (or to match
> the reference dropped in the completion path).

I've just re-pushed with the helper.

>
> I have no argument with what you're doing, and I unfortunately
> feel less confident (maybe 10% less) in my walk-through of
> all the affected code than I normally am on your patches...
> But I'll call that good enough.  I only have small blocks of
> time right now and I don't want to delay things.
>
> Reviewed-by: Alex Elder <elder@linaro.org>

Thanks for the review!

                Ilya

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2016-09-29 15:15 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-19 17:03 [PATCH 0/8] rbd: layering (mostly) error path fixes Ilya Dryomov
2016-09-19 17:03 ` [PATCH 1/8] rbd: change rbd_obj_request_submit() signature Ilya Dryomov
2016-09-23 15:38   ` Alex Elder
2016-09-26 12:38   ` David Disseldorp
2016-09-19 17:03 ` [PATCH 2/8] rbd: clean up asserts in rbd_img_obj_request_submit() helpers Ilya Dryomov
2016-09-23 15:57   ` Alex Elder
2016-09-23 16:07     ` Ilya Dryomov
2016-09-23 16:41       ` Alex Elder
2016-09-26 12:43   ` David Disseldorp
2016-09-19 17:03 ` [PATCH 3/8] rbd: mark the original request as done if stat request fails Ilya Dryomov
2016-09-23 21:39   ` Alex Elder
2016-09-26 12:28     ` Ilya Dryomov
2016-09-19 17:03 ` [PATCH 4/8] rbd: move bumping img_request refcount into rbd_obj_request_submit() Ilya Dryomov
2016-09-25 15:56   ` Alex Elder
2016-09-26 14:38     ` Ilya Dryomov
2016-09-28  0:22       ` Alex Elder
2016-09-29 15:15         ` Ilya Dryomov
2016-09-19 17:03 ` [PATCH 5/8] rbd: don't crash or leak on errors in rbd_img_obj_parent_read_full_callback() Ilya Dryomov
2016-09-25 16:02   ` Alex Elder
2016-09-26 12:58   ` David Disseldorp
2016-09-19 17:03 ` [PATCH 6/8] rbd: rework rbd_img_obj_exists_submit() error paths Ilya Dryomov
2016-09-25 16:30   ` Alex Elder
2016-09-26 12:05   ` David Disseldorp
2016-09-19 17:03 ` [PATCH 7/8] rbd: don't call rbd_osd_req_format_read() for !img_data requests Ilya Dryomov
2016-09-25 16:44   ` Alex Elder
2016-09-26 16:37     ` Ilya Dryomov
2016-09-27 17:11       ` Alex Elder
2016-09-26 12:05   ` David Disseldorp
2016-09-19 17:03 ` [PATCH 8/8] rbd: img_data requests don't own their page array Ilya Dryomov
2016-09-25 17:06   ` Alex Elder
2016-09-26 15:33     ` David Disseldorp
2016-09-26 17:25       ` Ilya Dryomov
2016-09-19 17:06 ` [PATCH 0/8] rbd: layering (mostly) error path fixes Alex Elder

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.