All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] rbd: avoid fast-diff corruption in snapshot-based mirroring
@ 2023-06-05 20:27 Ilya Dryomov
  2023-06-05 20:27 ` [PATCH 1/2] rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting Ilya Dryomov
  2023-06-05 20:27 ` [PATCH 2/2] rbd: get snapshot context after exclusive lock is ensured to be held Ilya Dryomov
  0 siblings, 2 replies; 5+ messages in thread
From: Ilya Dryomov @ 2023-06-05 20:27 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

Hello,

This fixes a potential data corruption in the destination image in
differential backup and especially snapshot-based mirroring use cases
with fast-diff enabled.

Thanks,

                Ilya


Ilya Dryomov (2):
  rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting
  rbd: get snapshot context after exclusive lock is ensured to be held

 drivers/block/rbd.c | 62 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 18 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting
  2023-06-05 20:27 [PATCH 0/2] rbd: avoid fast-diff corruption in snapshot-based mirroring Ilya Dryomov
@ 2023-06-05 20:27 ` Ilya Dryomov
  2023-06-06  2:34   ` Dongsheng Yang
  2023-06-05 20:27 ` [PATCH 2/2] rbd: get snapshot context after exclusive lock is ensured to be held Ilya Dryomov
  1 sibling, 1 reply; 5+ messages in thread
From: Ilya Dryomov @ 2023-06-05 20:27 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

Move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting into the object request
state machine to allow for the snapshot context to be captured in the
image request state machine rather than in rbd_queue_workfn().

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 84ad3b17956f..6c847db6ee2c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1334,14 +1334,28 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
 /*
  * Must be called after rbd_obj_calc_img_extents().
  */
-static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
+static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
 {
-	if (!obj_req->num_img_extents ||
-	    (rbd_obj_is_entire(obj_req) &&
-	     !obj_req->img_request->snapc->num_snaps))
-		return false;
+	if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
+		dout("%s %p objno %llu discard\n", __func__, obj_req,
+		     obj_req->ex.oe_objno);
+		return;
+	}
 
-	return true;
+	if (!obj_req->num_img_extents) {
+		dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
+		     obj_req->ex.oe_objno);
+		return;
+	}
+
+	if (rbd_obj_is_entire(obj_req) &&
+	    !obj_req->img_request->snapc->num_snaps) {
+		dout("%s %p objno %llu entire\n", __func__, obj_req,
+		     obj_req->ex.oe_objno);
+		return;
+	}
+
+	obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
 }
 
 static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
@@ -2233,9 +2247,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
 	if (ret)
 		return ret;
 
-	if (rbd_obj_copyup_enabled(obj_req))
-		obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
-
 	obj_req->write_state = RBD_OBJ_WRITE_START;
 	return 0;
 }
@@ -2341,8 +2352,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
 	if (ret)
 		return ret;
 
-	if (rbd_obj_copyup_enabled(obj_req))
-		obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
 	if (!obj_req->num_img_extents) {
 		obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
 		if (rbd_obj_is_entire(obj_req))
@@ -3286,6 +3295,7 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
 	case RBD_OBJ_WRITE_START:
 		rbd_assert(!*result);
 
+		rbd_obj_set_copyup_enabled(obj_req);
 		if (rbd_obj_write_is_noop(obj_req))
 			return true;
 
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] rbd: get snapshot context after exclusive lock is ensured to be held
  2023-06-05 20:27 [PATCH 0/2] rbd: avoid fast-diff corruption in snapshot-based mirroring Ilya Dryomov
  2023-06-05 20:27 ` [PATCH 1/2] rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting Ilya Dryomov
@ 2023-06-05 20:27 ` Ilya Dryomov
  2023-06-06  2:34   ` Dongsheng Yang
  1 sibling, 1 reply; 5+ messages in thread
From: Ilya Dryomov @ 2023-06-05 20:27 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

Move capturing the snapshot context into the image request state
machine, after exclusive lock is ensured to be held for the duration of
dealing with the image request.  This is needed to ensure correctness
of fast-diff states (OBJECT_EXISTS vs OBJECT_EXISTS_CLEAN) and object
deltas computed based off of them.  Otherwise the object map that is
forked for the snapshot isn't guaranteed to accurately reflect the
contents of the snapshot when the snapshot is taken under I/O.  This
breaks differential backup and snapshot-based mirroring use cases with
fast-diff enabled: since some object deltas may be incomplete, the
destination image may get corrupted.

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/61472
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6c847db6ee2c..632751ddb287 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1336,6 +1336,8 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
  */
 static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
 {
+	rbd_assert(obj_req->img_request->snapc);
+
 	if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
 		dout("%s %p objno %llu discard\n", __func__, obj_req,
 		     obj_req->ex.oe_objno);
@@ -1456,6 +1458,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
 static struct ceph_osd_request *
 rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
 {
+	rbd_assert(obj_req->img_request->snapc);
 	return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
 					 num_ops);
 }
@@ -1592,15 +1595,18 @@ static void rbd_img_request_init(struct rbd_img_request *img_request,
 	mutex_init(&img_request->state_mutex);
 }
 
+/*
+ * Only snap_id is captured here, for reads.  For writes, snapshot
+ * context is captured in rbd_img_object_requests() after exclusive
+ * lock is ensured to be held.
+ */
 static void rbd_img_capture_header(struct rbd_img_request *img_req)
 {
 	struct rbd_device *rbd_dev = img_req->rbd_dev;
 
 	lockdep_assert_held(&rbd_dev->header_rwsem);
 
-	if (rbd_img_is_write(img_req))
-		img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
-	else
+	if (!rbd_img_is_write(img_req))
 		img_req->snap_id = rbd_dev->spec->snap_id;
 
 	if (rbd_dev_parent_get(rbd_dev))
@@ -3482,9 +3488,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
 
 static void rbd_img_object_requests(struct rbd_img_request *img_req)
 {
+	struct rbd_device *rbd_dev = img_req->rbd_dev;
 	struct rbd_obj_request *obj_req;
 
 	rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
+	rbd_assert(!need_exclusive_lock(img_req) ||
+		   __rbd_is_lock_owner(rbd_dev));
+
+	if (rbd_img_is_write(img_req)) {
+		rbd_assert(!img_req->snapc);
+		down_read(&rbd_dev->header_rwsem);
+		img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+		up_read(&rbd_dev->header_rwsem);
+	}
 
 	for_each_obj_request(img_req, obj_req) {
 		int result = 0;
@@ -3502,7 +3518,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
 
 static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
 {
-	struct rbd_device *rbd_dev = img_req->rbd_dev;
 	int ret;
 
 again:
@@ -3523,9 +3538,6 @@ static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
 		if (*result)
 			return true;
 
-		rbd_assert(!need_exclusive_lock(img_req) ||
-			   __rbd_is_lock_owner(rbd_dev));
-
 		rbd_img_object_requests(img_req);
 		if (!img_req->pending.num_pending) {
 			*result = img_req->pending.result;
@@ -3987,6 +3999,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
 {
 	int ret;
 
+	ret = rbd_dev_refresh(rbd_dev);
+	if (ret)
+		return ret;
+
 	if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
 		ret = rbd_object_map_open(rbd_dev);
 		if (ret)
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting
  2023-06-05 20:27 ` [PATCH 1/2] rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting Ilya Dryomov
@ 2023-06-06  2:34   ` Dongsheng Yang
  0 siblings, 0 replies; 5+ messages in thread
From: Dongsheng Yang @ 2023-06-06  2:34 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel



在 2023/6/6 星期二 上午 4:27, Ilya Dryomov 写道:
> Move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting into the object request
> state machine to allow for the snapshot context to be captured in the
> image request state machine rather than in rbd_queue_workfn().
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Reviewed-by: Dongsheng Yang dongsheng.yang@easystack.cn
> ---
>   drivers/block/rbd.c | 32 +++++++++++++++++++++-----------
>   1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 84ad3b17956f..6c847db6ee2c 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -1334,14 +1334,28 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
>   /*
>    * Must be called after rbd_obj_calc_img_extents().
>    */
> -static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
> +static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
>   {
> -	if (!obj_req->num_img_extents ||
> -	    (rbd_obj_is_entire(obj_req) &&
> -	     !obj_req->img_request->snapc->num_snaps))
> -		return false;
> +	if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
> +		dout("%s %p objno %llu discard\n", __func__, obj_req,
> +		     obj_req->ex.oe_objno);
> +		return;
> +	}
>   
> -	return true;
> +	if (!obj_req->num_img_extents) {
> +		dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
> +		     obj_req->ex.oe_objno);
> +		return;
> +	}
> +
> +	if (rbd_obj_is_entire(obj_req) &&
> +	    !obj_req->img_request->snapc->num_snaps) {
> +		dout("%s %p objno %llu entire\n", __func__, obj_req,
> +		     obj_req->ex.oe_objno);
> +		return;
> +	}
> +
> +	obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
>   }
>   
>   static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
> @@ -2233,9 +2247,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
>   	if (ret)
>   		return ret;
>   
> -	if (rbd_obj_copyup_enabled(obj_req))
> -		obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
> -
>   	obj_req->write_state = RBD_OBJ_WRITE_START;
>   	return 0;
>   }
> @@ -2341,8 +2352,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
>   	if (ret)
>   		return ret;
>   
> -	if (rbd_obj_copyup_enabled(obj_req))
> -		obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
>   	if (!obj_req->num_img_extents) {
>   		obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
>   		if (rbd_obj_is_entire(obj_req))
> @@ -3286,6 +3295,7 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
>   	case RBD_OBJ_WRITE_START:
>   		rbd_assert(!*result);
>   
> +		rbd_obj_set_copyup_enabled(obj_req);
>   		if (rbd_obj_write_is_noop(obj_req))
>   			return true;
>   
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] rbd: get snapshot context after exclusive lock is ensured to be held
  2023-06-05 20:27 ` [PATCH 2/2] rbd: get snapshot context after exclusive lock is ensured to be held Ilya Dryomov
@ 2023-06-06  2:34   ` Dongsheng Yang
  0 siblings, 0 replies; 5+ messages in thread
From: Dongsheng Yang @ 2023-06-06  2:34 UTC (permalink / raw)
  To: Ilya Dryomov, ceph-devel



在 2023/6/6 星期二 上午 4:27, Ilya Dryomov 写道:
> Move capturing the snapshot context into the image request state
> machine, after exclusive lock is ensured to be held for the duration of
> dealing with the image request.  This is needed to ensure correctness
> of fast-diff states (OBJECT_EXISTS vs OBJECT_EXISTS_CLEAN) and object
> deltas computed based off of them.  Otherwise the object map that is
> forked for the snapshot isn't guaranteed to accurately reflect the
> contents of the snapshot when the snapshot is taken under I/O.  This
> breaks differential backup and snapshot-based mirroring use cases with
> fast-diff enabled: since some object deltas may be incomplete, the
> destination image may get corrupted.
> 
> Cc: stable@vger.kernel.org
> Link: https://tracker.ceph.com/issues/61472
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

Reviewed-by: Dongsheng Yang dongsheng.yang@easystack.cn
> ---
>   drivers/block/rbd.c | 30 +++++++++++++++++++++++-------
>   1 file changed, 23 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 6c847db6ee2c..632751ddb287 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -1336,6 +1336,8 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
>    */
>   static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
>   {
> +	rbd_assert(obj_req->img_request->snapc);
> +
>   	if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
>   		dout("%s %p objno %llu discard\n", __func__, obj_req,
>   		     obj_req->ex.oe_objno);
> @@ -1456,6 +1458,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
>   static struct ceph_osd_request *
>   rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
>   {
> +	rbd_assert(obj_req->img_request->snapc);
>   	return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
>   					 num_ops);
>   }
> @@ -1592,15 +1595,18 @@ static void rbd_img_request_init(struct rbd_img_request *img_request,
>   	mutex_init(&img_request->state_mutex);
>   }
>   
> +/*
> + * Only snap_id is captured here, for reads.  For writes, snapshot
> + * context is captured in rbd_img_object_requests() after exclusive
> + * lock is ensured to be held.
> + */
>   static void rbd_img_capture_header(struct rbd_img_request *img_req)
>   {
>   	struct rbd_device *rbd_dev = img_req->rbd_dev;
>   
>   	lockdep_assert_held(&rbd_dev->header_rwsem);
>   
> -	if (rbd_img_is_write(img_req))
> -		img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
> -	else
> +	if (!rbd_img_is_write(img_req))
>   		img_req->snap_id = rbd_dev->spec->snap_id;
>   
>   	if (rbd_dev_parent_get(rbd_dev))
> @@ -3482,9 +3488,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
>   
>   static void rbd_img_object_requests(struct rbd_img_request *img_req)
>   {
> +	struct rbd_device *rbd_dev = img_req->rbd_dev;
>   	struct rbd_obj_request *obj_req;
>   
>   	rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
> +	rbd_assert(!need_exclusive_lock(img_req) ||
> +		   __rbd_is_lock_owner(rbd_dev));
> +
> +	if (rbd_img_is_write(img_req)) {
> +		rbd_assert(!img_req->snapc);
> +		down_read(&rbd_dev->header_rwsem);
> +		img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
> +		up_read(&rbd_dev->header_rwsem);
> +	}
>   
>   	for_each_obj_request(img_req, obj_req) {
>   		int result = 0;
> @@ -3502,7 +3518,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
>   
>   static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
>   {
> -	struct rbd_device *rbd_dev = img_req->rbd_dev;
>   	int ret;
>   
>   again:
> @@ -3523,9 +3538,6 @@ static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
>   		if (*result)
>   			return true;
>   
> -		rbd_assert(!need_exclusive_lock(img_req) ||
> -			   __rbd_is_lock_owner(rbd_dev));
> -
>   		rbd_img_object_requests(img_req);
>   		if (!img_req->pending.num_pending) {
>   			*result = img_req->pending.result;
> @@ -3987,6 +3999,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
>   {
>   	int ret;
>   
> +	ret = rbd_dev_refresh(rbd_dev);
> +	if (ret)
> +		return ret;
> +
>   	if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
>   		ret = rbd_object_map_open(rbd_dev);
>   		if (ret)
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-06-06  2:43 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-05 20:27 [PATCH 0/2] rbd: avoid fast-diff corruption in snapshot-based mirroring Ilya Dryomov
2023-06-05 20:27 ` [PATCH 1/2] rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting Ilya Dryomov
2023-06-06  2:34   ` Dongsheng Yang
2023-06-05 20:27 ` [PATCH 2/2] rbd: get snapshot context after exclusive lock is ensured to be held Ilya Dryomov
2023-06-06  2:34   ` Dongsheng Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.