All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 00/17] snapshot-merge target
@ 2009-10-20 22:46 Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 01/17] dm snapshot: rework writing to snapshot origin Mike Snitzer
                   ` (16 more replies)
  0 siblings, 17 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel

snapshot-merge target, quilt tree is maintained here:
http://people.redhat.com/msnitzer/patches/snapshot-merge/kernel/2.6.32/

For userspace LVM2 support please see:
http://people.redhat.com/msnitzer/patches/snapshot-merge/lvm2/LVM2-2.02.54/

Mike Snitzer (4):
  dm snapshot: add suspended flag to dm_snapshot
  dm snapshot: exception handover improvements
  dm exception store: snapshot-merge usage accounting
  dm snapshot: merge a linear region of chunks using one large IO

Mikulas Patocka (13):
  dm snapshot: rework writing to snapshot origin
  dm snapshot: initial support for exception handover
  dm exception store: do not read metadata if going to handover exceptions
  dm exception store: add snapshot-merge specific methods
  dm snapshot: add snapshot-merge target
  dm snapshot: merge target should not allocate new exceptions
  dm snapshot: do not allow more than one merging snapshot.
  dm snapshot: the merge procedure
  dm snapshot: queue writes to an area that is actively being merged
  dm snapshot: do not merge a chunk until active writes to it finish
  dm snapshot: make exceptions in other snapshots when merging
  dm snapshot: make exceptions if merge is dispatching to origin
  dm snapshot: redirect accesses to origin if merging snap invalidated

 drivers/md/dm-exception-store.h |   33 ++-
 drivers/md/dm-snap-persistent.c |   96 ++++++-
 drivers/md/dm-snap-transient.c  |    2 +-
 drivers/md/dm-snap.c            |  687 ++++++++++++++++++++++++++++++++-------
 4 files changed, 689 insertions(+), 129 deletions(-)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2 01/17] dm snapshot: rework writing to snapshot origin
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot Mike Snitzer
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

The previous code selected one exception as "primary_pe", linked all
other exceptions on it and used reference counting to wait until all
exceptions are reallocated. This didn't work with exceptions with
different chunk sizes:
https://bugzilla.redhat.com/show_bug.cgi?id=182659

I removed all the complexity with exceptions linking and reference
counting.  Currently, bio is linked on one exception and when that
exception is reallocated, the bio is retried to possibly wait for
other exceptions.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |  165 +++++++++++++++++--------------------------------
 1 files changed, 57 insertions(+), 108 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9e660ce..6181878 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -134,28 +134,6 @@ struct dm_snap_pending_exception {
 	struct bio_list origin_bios;
 	struct bio_list snapshot_bios;
 
-	/*
-	 * Short-term queue of pending exceptions prior to submission.
-	 */
-	struct list_head list;
-
-	/*
-	 * The primary pending_exception is the one that holds
-	 * the ref_count and the list of origin_bios for a
-	 * group of pending_exceptions.  It is always last to get freed.
-	 * These fields get set up when writing to the origin.
-	 */
-	struct dm_snap_pending_exception *primary_pe;
-
-	/*
-	 * Number of pending_exceptions processing this chunk.
-	 * When this drops to zero we must complete the origin bios.
-	 * If incrementing or decrementing this, hold pe->snap->lock for
-	 * the sibling concerned and not pe->primary_pe->snap->lock unless
-	 * they are the same.
-	 */
-	atomic_t ref_count;
-
 	/* Pointer back to snapshot context */
 	struct dm_snapshot *snap;
 
@@ -831,6 +809,28 @@ static void flush_queued_bios(struct work_struct *work)
 	flush_bios(queued_bios);
 }
 
+static int do_origin(struct dm_dev *origin, struct bio *bio);
+
+/*
+ * Flush a list of buffers.
+ */
+static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
+{
+	struct bio *n;
+	int r;
+
+	while (bio) {
+		n = bio->bi_next;
+		bio->bi_next = NULL;
+		r = do_origin(s->origin, bio);
+		if (r == DM_MAPIO_REMAPPED)
+			generic_make_request(bio);
+		else
+			BUG_ON(r != DM_MAPIO_SUBMITTED);
+		bio = n;
+	}
+}
+
 /*
  * Error a list of buffers.
  */
@@ -864,39 +864,6 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
 	dm_table_event(s->ti->table);
 }
 
-static void get_pending_exception(struct dm_snap_pending_exception *pe)
-{
-	atomic_inc(&pe->ref_count);
-}
-
-static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
-{
-	struct dm_snap_pending_exception *primary_pe;
-	struct bio *origin_bios = NULL;
-
-	primary_pe = pe->primary_pe;
-
-	/*
-	 * If this pe is involved in a write to the origin and
-	 * it is the last sibling to complete then release
-	 * the bios for the original write to the origin.
-	 */
-	if (primary_pe &&
-	    atomic_dec_and_test(&primary_pe->ref_count)) {
-		origin_bios = bio_list_get(&primary_pe->origin_bios);
-		free_pending_exception(primary_pe);
-	}
-
-	/*
-	 * Free the pe if it's not linked to an origin write or if
-	 * it's not itself a primary pe.
-	 */
-	if (!primary_pe || primary_pe != pe)
-		free_pending_exception(pe);
-
-	return origin_bios;
-}
-
 static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 {
 	struct dm_exception *e;
@@ -945,7 +912,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
  out:
 	dm_remove_exception(&pe->e);
 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
-	origin_bios = put_pending_exception(pe);
+	origin_bios = bio_list_get(&pe->origin_bios);
+	free_pending_exception(pe);
 
 	up_write(&s->lock);
 
@@ -955,7 +923,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 	else
 		flush_bios(snapshot_bios);
 
-	flush_bios(origin_bios);
+	retry_origin_bios(s, origin_bios);
 }
 
 static void commit_callback(void *context, int success)
@@ -1042,8 +1010,6 @@ __find_pending_exception(struct dm_snapshot *s,
 	pe->e.old_chunk = chunk;
 	bio_list_init(&pe->origin_bios);
 	bio_list_init(&pe->snapshot_bios);
-	pe->primary_pe = NULL;
-	atomic_set(&pe->ref_count, 0);
 	pe->started = 0;
 
 	if (s->store->type->prepare_exception(s->store, &pe->e)) {
@@ -1051,7 +1017,6 @@ __find_pending_exception(struct dm_snapshot *s,
 		return NULL;
 	}
 
-	get_pending_exception(pe);
 	dm_insert_exception(&s->pending, &pe->e);
 
 	return pe;
@@ -1241,14 +1206,21 @@ static int snapshot_iterate_devices(struct dm_target *ti,
 /*-----------------------------------------------------------------
  * Origin methods
  *---------------------------------------------------------------*/
-static int __origin_write(struct list_head *snapshots, struct bio *bio)
+
+/*
+ * Returns:
+ *	DM_MAPIO_REMAPPED: bio may be submitted to origin device
+ *	DM_MAPIO_SUBMITTED: bio was queued on queue on one of exceptions
+ */
+
+static int __origin_write(struct list_head *snapshots,
+			  sector_t sector, struct bio *bio)
 {
-	int r = DM_MAPIO_REMAPPED, first = 0;
+	int r = DM_MAPIO_REMAPPED;
 	struct dm_snapshot *snap;
 	struct dm_exception *e;
-	struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
+	struct dm_snap_pending_exception *pe, *pe_to_start = NULL;
 	chunk_t chunk;
-	LIST_HEAD(pe_queue);
 
 	/* Do all the snapshots on this origin */
 	list_for_each_entry (snap, snapshots, list) {
@@ -1260,22 +1232,19 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 			goto next_snapshot;
 
 		/* Nothing to do if writing beyond end of snapshot */
-		if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+		if (sector >= dm_table_get_size(snap->ti->table))
 			goto next_snapshot;
 
 		/*
 		 * Remember, different snapshots can have
 		 * different chunk sizes.
 		 */
-		chunk = sector_to_chunk(snap->store, bio->bi_sector);
+		chunk = sector_to_chunk(snap->store, sector);
 
 		/*
 		 * Check exception table to see if block
 		 * is already remapped in this snapshot
 		 * and trigger an exception if not.
-		 *
-		 * ref_count is initialised to 1 so pending_complete()
-		 * won't destroy the primary_pe while we're inside this loop.
 		 */
 		e = dm_lookup_exception(&snap->complete, chunk);
 		if (e)
@@ -1305,59 +1274,39 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
 			}
 		}
 
-		if (!primary_pe) {
-			/*
-			 * Either every pe here has same
-			 * primary_pe or none has one yet.
-			 */
-			if (pe->primary_pe)
-				primary_pe = pe->primary_pe;
-			else {
-				primary_pe = pe;
-				first = 1;
-			}
-
-			bio_list_add(&primary_pe->origin_bios, bio);
+		r = DM_MAPIO_SUBMITTED;
 
-			r = DM_MAPIO_SUBMITTED;
-		}
+		if (bio) {
+			bio_list_add(&pe->origin_bios, bio);
+			bio = NULL;
 
-		if (!pe->primary_pe) {
-			pe->primary_pe = primary_pe;
-			get_pending_exception(primary_pe);
+			if (!pe->started) {
+				pe->started = 1;
+				pe_to_start = pe;
+			}
 		}
 
 		if (!pe->started) {
 			pe->started = 1;
-			list_add_tail(&pe->list, &pe_queue);
+			start_copy(pe);
 		}
 
  next_snapshot:
 		up_write(&snap->lock);
 	}
 
-	if (!primary_pe)
-		return r;
-
 	/*
-	 * If this is the first time we're processing this chunk and
-	 * ref_count is now 1 it means all the pending exceptions
-	 * got completed while we were in the loop above, so it falls to
-	 * us here to remove the primary_pe and submit any origin_bios.
+	 * pe_to_start is a small performance improvement:
+	 * To avoid calling __origin_write N times for N snapshots, we start
+	 * the snapshot where we queued the bio as the last one.
+	 *
+	 * If we start it as the last one, it finishes most likely as the last
+	 * one and exceptions in other snapshots will be already finished when
+	 * the bio will be retried.
 	 */
 
-	if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
-		flush_bios(bio_list_get(&primary_pe->origin_bios));
-		free_pending_exception(primary_pe);
-		/* If we got here, pe_queue is necessarily empty. */
-		return r;
-	}
-
-	/*
-	 * Now that we have a complete pe list we can start the copying.
-	 */
-	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
-		start_copy(pe);
+	if (pe_to_start)
+		start_copy(pe_to_start);
 
 	return r;
 }
@@ -1373,7 +1322,7 @@ static int do_origin(struct dm_dev *origin, struct bio *bio)
 	down_read(&_origins_lock);
 	o = __lookup_origin(origin->bdev);
 	if (o)
-		r = __origin_write(&o->snapshots, bio);
+		r = __origin_write(&o->snapshots, bio->bi_sector, bio);
 	up_read(&_origins_lock);
 
 	return r;
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 01/17] dm snapshot: rework writing to snapshot origin Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-11-05  0:56   ` Alasdair G Kergon
  2009-10-20 22:46 ` [PATCH v2 03/17] dm snapshot: initial support for exception handover Mike Snitzer
                   ` (14 subsequent siblings)
  16 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel

Allow the snapshot target to be able to verify its state relative to a
requested operation.  Allows for invalid operations to be prevented,
e.g. an attempt handover an "old" snapshot's exceptions without it
having been suspended first.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   17 ++++++++++++++++-
 1 files changed, 16 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 6181878..be37439 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -72,6 +72,9 @@ struct dm_snapshot {
 	/* Origin writes don't trigger exceptions until this is set */
 	int active;
 
+	/* Set if the parent device is suspended */
+	int suspended;
+
 	mempool_t *pending_pool;
 
 	atomic_t pending_exceptions_count;
@@ -632,6 +635,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	s->ti = ti;
 	s->valid = 1;
 	s->active = 0;
+	s->suspended = 0;
 	atomic_set(&s->pending_exceptions_count, 0);
 	init_rwsem(&s->lock);
 	spin_lock_init(&s->pe_lock);
@@ -1138,12 +1142,22 @@ static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
 	return 0;
 }
 
+static void snapshot_presuspend(struct dm_target *ti)
+{
+	struct dm_snapshot *s = ti->private;
+
+	down_write(&s->lock);
+	s->suspended = 1;
+	up_write(&s->lock);
+}
+
 static void snapshot_resume(struct dm_target *ti)
 {
 	struct dm_snapshot *s = ti->private;
 
 	down_write(&s->lock);
 	s->active = 1;
+	s->suspended = 0;
 	up_write(&s->lock);
 }
 
@@ -1434,12 +1448,13 @@ static struct target_type origin_target = {
 
 static struct target_type snapshot_target = {
 	.name    = "snapshot",
-	.version = {1, 8, 0},
+	.version = {1, 9, 0},
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
 	.map     = snapshot_map,
 	.end_io  = snapshot_end_io,
+	.presuspend = snapshot_presuspend,
 	.resume  = snapshot_resume,
 	.status  = snapshot_status,
 	.iterate_devices = snapshot_iterate_devices,
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 03/17] dm snapshot: initial support for exception handover
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 01/17] dm snapshot: rework writing to snapshot origin Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 04/17] dm snapshot: exception handover improvements Mike Snitzer
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

Handover of the exception store. This is needed for merging (to allow reload of
the table) and it also enables origin or snapshot resize.

The supported call sequences are:

new_snapshot->ctr
old_snapshot->suspend
old_snapshot->dtr
new_snapshot->resume

and

new_snapshot->ctr
old_snapshot->suspend
new_snapshot->resume
old_snapshot->dtr

There may not be more than two instances of a given snapshot.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
---
 drivers/md/dm-snap.c |   85 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 85 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index be37439..b42c82e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -75,6 +75,9 @@ struct dm_snapshot {
 	/* Set if the parent device is suspended */
 	int suspended;
 
+	/* Exception store will be handed over from another snapshot */
+	int handover;
+
 	mempool_t *pending_pool;
 
 	atomic_t pending_exceptions_count;
@@ -506,6 +509,33 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 	return 0;
 }
 
+/* _origins_lock must be held */
+static struct dm_snapshot *__find_duplicate(struct dm_snapshot *snap)
+{
+	struct dm_snapshot *dup;
+	struct dm_snapshot *l;
+	struct origin *o;
+
+	o = __lookup_origin(snap->origin->bdev);
+	if (!o)
+		return NULL;
+
+	dup = NULL;
+	list_for_each_entry(l, &o->snapshots, list)
+		if (l != snap && bdev_equal(l->cow->bdev,
+					    snap->cow->bdev)) {
+			if (!dup) {
+				dup = l;
+			} else {
+				DMERR("Multiple active duplicates, "
+				      "user misuses dmsetup.");
+				return NULL;
+			}
+		}
+
+	return dup;
+}
+
 #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
 
 /*
@@ -637,6 +667,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	s->active = 0;
 	s->suspended = 0;
 	atomic_set(&s->pending_exceptions_count, 0);
+	s->handover = 0;
 	init_rwsem(&s->lock);
 	spin_lock_init(&s->pe_lock);
 
@@ -672,6 +703,11 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	spin_lock_init(&s->tracked_chunk_lock);
 
+	down_read(&_origins_lock);
+	if (__find_duplicate(s))
+		s->handover = 1;
+	up_read(&_origins_lock);
+
 	/* Metadata must only be loaded into one table at once */
 	r = s->store->type->read_metadata(s->store, dm_add_exception,
 					  (void *)s);
@@ -743,15 +779,49 @@ static void __free_exceptions(struct dm_snapshot *s)
 	dm_exception_table_exit(&s->complete, exception_cache);
 }
 
+static void handover_exceptions(struct dm_snapshot *old,
+				struct dm_snapshot *new)
+{
+	union {
+		struct dm_exception_table table_swap;
+		struct dm_exception_store *store_swap;
+	} u;
+
+	u.table_swap = new->complete;
+	new->complete = old->complete;
+	old->complete = u.table_swap;
+	u.store_swap = new->store;
+	new->store = old->store;
+	old->store = u.store_swap;
+
+	new->store->snap = new;
+	old->store->snap = old;
+
+	old->active = 0;
+	new->handover = 0;
+}
+
 static void snapshot_dtr(struct dm_target *ti)
 {
 #ifdef CONFIG_DM_DEBUG
 	int i;
 #endif
 	struct dm_snapshot *s = ti->private;
+	struct dm_snapshot *dup;
 
 	flush_workqueue(ksnapd);
 
+	down_write(&_origins_lock);
+	down_write(&s->lock);
+	dup = __find_duplicate(s);
+	if (dup && dup->handover) {
+		down_write_nested(&dup->lock, SINGLE_DEPTH_NESTING);
+		handover_exceptions(s, dup);
+		up_write(&dup->lock);
+	}
+	up_write(&s->lock);
+	up_write(&_origins_lock);
+
 	/* Prevent further origin writes from using this snapshot. */
 	/* After this returns there can be no new kcopyd jobs. */
 	unregister_snapshot(s);
@@ -1155,10 +1225,25 @@ static void snapshot_resume(struct dm_target *ti)
 {
 	struct dm_snapshot *s = ti->private;
 
+	down_write(&_origins_lock);
 	down_write(&s->lock);
+	if (s->handover) {
+		struct dm_snapshot *dup;
+		dup = __find_duplicate(s);
+		if (!dup) {
+			DMERR("duplicate not found");
+			s->valid = 0;
+			goto ret;
+		}
+		down_write_nested(&dup->lock, SINGLE_DEPTH_NESTING);
+		handover_exceptions(dup, s);
+		up_write(&dup->lock);
+	}
 	s->active = 1;
 	s->suspended = 0;
+ ret:
 	up_write(&s->lock);
+	up_write(&_origins_lock);
 }
 
 static int snapshot_status(struct dm_target *ti, status_type_t type,
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 04/17] dm snapshot: exception handover improvements
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (2 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 03/17] dm snapshot: initial support for exception handover Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-23 15:01   ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 05/17] dm exception store: do not read metadata if going to handover exceptions Mike Snitzer
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka, Alasdair G Kergon

Simplify exception handover so that the duplicate snapshot is found in
one location (snapshot_ctr) and recorded in the associated (old and new)
snapshots that will be participating in the handover.

Confines handover to be from a specific snapshot to another specific
snapshot without additional __find_duplicate() calls.

Documents the exception handover state diagram with relevant comments
in the associated code.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Jonathan Brassow <jbrassow@redhat.com>
---
 drivers/md/dm-snap.c |  100 +++++++++++++++++++++++++++++++++----------------
 1 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index b42c82e..5414e66 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -75,9 +75,26 @@ struct dm_snapshot {
 	/* Set if the parent device is suspended */
 	int suspended;
 
-	/* Exception store will be handed over from another snapshot */
+	/*
+	 * 'handover' denotes exception store will be handed over from
+	 * another snapshot with the same cow block device (as identified
+	 * with __find_duplicate).
+	 *
+	 * 'handover' is set during a new snapshot's constructor if it finds
+	 * a single old snapshot is using the same cow block device as it.
+	 * Handover operation is performed, and 'handover' is cleared,
+	 * when either of the following occurs:
+	 * - old snapshot, that is handing over, is destructed
+	 * - new snapshot, that is accepting the handover, is resumed
+	 */
 	int handover;
 
+	/*
+	 * reference to the other snapshot that will participate in the
+	 * exception store handover; new references old, old references new
+	 */
+	struct dm_snapshot *handover_snap;
+
 	mempool_t *pending_pool;
 
 	atomic_t pending_exceptions_count;
@@ -510,7 +527,8 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 }
 
 /* _origins_lock must be held */
-static struct dm_snapshot *__find_duplicate(struct dm_snapshot *snap)
+static struct dm_snapshot *__find_duplicate(struct dm_snapshot *snap,
+					    int *duplicate_count)
 {
 	struct dm_snapshot *dup;
 	struct dm_snapshot *l;
@@ -521,16 +539,13 @@ static struct dm_snapshot *__find_duplicate(struct dm_snapshot *snap)
 		return NULL;
 
 	dup = NULL;
+	*duplicate_count = 0;
 	list_for_each_entry(l, &o->snapshots, list)
 		if (l != snap && bdev_equal(l->cow->bdev,
 					    snap->cow->bdev)) {
-			if (!dup) {
+			if (!dup)
 				dup = l;
-			} else {
-				DMERR("Multiple active duplicates, "
-				      "user misuses dmsetup.");
-				return NULL;
-			}
+			(*duplicate_count)++;
 		}
 
 	return dup;
@@ -611,8 +626,8 @@ static int init_hash_tables(struct dm_snapshot *s)
  */
 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
-	struct dm_snapshot *s;
-	int i;
+	struct dm_snapshot *s, *dup;
+	int i, duplicate_count = 0;
 	int r = -EINVAL;
 	char *origin_path, *cow_path;
 	unsigned args_used;
@@ -668,6 +683,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	s->suspended = 0;
 	atomic_set(&s->pending_exceptions_count, 0);
 	s->handover = 0;
+	s->handover_snap = NULL;
 	init_rwsem(&s->lock);
 	spin_lock_init(&s->pe_lock);
 
@@ -703,10 +719,23 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	spin_lock_init(&s->tracked_chunk_lock);
 
+	/* Snapshot may need to have exceptions handed over to it */
+	r = -EINVAL;
 	down_read(&_origins_lock);
-	if (__find_duplicate(s))
-		s->handover = 1;
+	dup = __find_duplicate(s, &duplicate_count);
 	up_read(&_origins_lock);
+	if (dup) {
+		if (duplicate_count > 1) {
+			ti->error = "More than one other snapshot has been "
+				    "constructed with the same cow device.";
+			goto bad_load_and_register;
+		}
+		/* cross reference snapshots that will do handover */
+		dup->handover_snap = s;
+		s->handover_snap = dup;
+		/* this new snapshot will accept the handover */
+		s->handover = 1;
+	}
 
 	/* Metadata must only be loaded into one table at once */
 	r = s->store->type->read_metadata(s->store, dm_add_exception,
@@ -787,6 +816,11 @@ static void handover_exceptions(struct dm_snapshot *old,
 		struct dm_exception_store *store_swap;
 	} u;
 
+	BUG_ON((old->handover_snap != new) ||
+	       (new->handover_snap != old));
+	BUG_ON((old->handover != 0) || (new->handover != 1));
+	BUG_ON(!old->suspended);
+
 	u.table_swap = new->complete;
 	new->complete = old->complete;
 	old->complete = u.table_swap;
@@ -797,7 +831,14 @@ static void handover_exceptions(struct dm_snapshot *old,
 	new->store->snap = new;
 	old->store->snap = old;
 
+	/* Mark old snapshot invalid and inactive */
+	old->valid = 0;
 	old->active = 0;
+
+	/* Reset handover_snap references */
+	old->handover_snap = NULL;
+	new->handover_snap = NULL;
+
 	new->handover = 0;
 }
 
@@ -807,20 +848,18 @@ static void snapshot_dtr(struct dm_target *ti)
 	int i;
 #endif
 	struct dm_snapshot *s = ti->private;
-	struct dm_snapshot *dup;
 
 	flush_workqueue(ksnapd);
 
-	down_write(&_origins_lock);
+	/* This snapshot may need to handover its exception store */
 	down_write(&s->lock);
-	dup = __find_duplicate(s);
-	if (dup && dup->handover) {
-		down_write_nested(&dup->lock, SINGLE_DEPTH_NESTING);
-		handover_exceptions(s, dup);
-		up_write(&dup->lock);
+	if (s->handover_snap) {
+		struct dm_snapshot *new_snap = s->handover_snap;
+		down_write_nested(&new_snap->lock, SINGLE_DEPTH_NESTING);
+		handover_exceptions(s, new_snap);
+		up_write(&new_snap->lock);
 	}
 	up_write(&s->lock);
-	up_write(&_origins_lock);
 
 	/* Prevent further origin writes from using this snapshot. */
 	/* After this returns there can be no new kcopyd jobs. */
@@ -1225,25 +1264,20 @@ static void snapshot_resume(struct dm_target *ti)
 {
 	struct dm_snapshot *s = ti->private;
 
-	down_write(&_origins_lock);
 	down_write(&s->lock);
 	if (s->handover) {
-		struct dm_snapshot *dup;
-		dup = __find_duplicate(s);
-		if (!dup) {
-			DMERR("duplicate not found");
-			s->valid = 0;
-			goto ret;
-		}
-		down_write_nested(&dup->lock, SINGLE_DEPTH_NESTING);
-		handover_exceptions(dup, s);
-		up_write(&dup->lock);
+		/* Get exception store from another snapshot */
+		struct dm_snapshot *old_snap = s->handover_snap;
+		BUG_ON(!old_snap);
+		down_write_nested(&old_snap->lock, SINGLE_DEPTH_NESTING);
+		handover_exceptions(old_snap, s);
+		up_write(&old_snap->lock);
 	}
+	/* An incomplete exception handover is not allowed */
+	BUG_ON(s->handover || s->handover_snap);
 	s->active = 1;
 	s->suspended = 0;
- ret:
 	up_write(&s->lock);
-	up_write(&_origins_lock);
 }
 
 static int snapshot_status(struct dm_target *ti, status_type_t type,
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 05/17] dm exception store: do not read metadata if going to handover exceptions
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (3 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 04/17] dm snapshot: exception handover improvements Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 06/17] dm exception store: add snapshot-merge specific methods Mike Snitzer
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

This saves memory (without the patch, the exception store would consume twice
more memory while doing handover).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jonathan Brassow <jbrassow@redhat.com>
---
 drivers/md/dm-exception-store.h |    6 +++++-
 drivers/md/dm-snap-persistent.c |    5 +++--
 drivers/md/dm-snap-transient.c  |    2 +-
 drivers/md/dm-snap.c            |    2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index bb88746..a96a40a 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -54,11 +54,15 @@ struct dm_exception_store_type {
 	 * The target shouldn't read the COW device until this is
 	 * called.  As exceptions are read from the COW, they are
 	 * reported back via the callback.
+	 *
+	 * will_handover means that there is another snapshot active;
+	 * chunk size must be setup but no exceptions need to be read
+	 * because they will be handed over from the active snapshot.
 	 */
 	int (*read_metadata) (struct dm_exception_store *store,
 			      int (*callback)(void *callback_context,
 					      chunk_t old, chunk_t new),
-			      void *callback_context);
+			      void *callback_context, int will_handover);
 
 	/*
 	 * Find somewhere to store the next exception.
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 157999e..e0f526a 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -529,7 +529,7 @@ static void persistent_dtr(struct dm_exception_store *store)
 static int persistent_read_metadata(struct dm_exception_store *store,
 				    int (*callback)(void *callback_context,
 						    chunk_t old, chunk_t new),
-				    void *callback_context)
+				    void *callback_context, int will_handover)
 {
 	int r, uninitialized_var(new_snapshot);
 	struct pstore *ps = get_info(store);
@@ -586,7 +586,8 @@ static int persistent_read_metadata(struct dm_exception_store *store,
 	/*
 	 * Read the metadata.
 	 */
-	r = read_exceptions(ps, callback, callback_context);
+	if (!will_handover)
+		r = read_exceptions(ps, callback, callback_context);
 
 	return r;
 }
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index a0898a6..341231a 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -30,7 +30,7 @@ static void transient_dtr(struct dm_exception_store *store)
 static int transient_read_metadata(struct dm_exception_store *store,
 				   int (*callback)(void *callback_context,
 						   chunk_t old, chunk_t new),
-				   void *callback_context)
+				   void *callback_context, int will_handover)
 {
 	return 0;
 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5414e66..1f0a57c 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -739,7 +739,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	/* Metadata must only be loaded into one table at once */
 	r = s->store->type->read_metadata(s->store, dm_add_exception,
-					  (void *)s);
+					  (void *)s, s->handover);
 	if (r < 0) {
 		ti->error = "Failed to read snapshot metadata";
 		goto bad_load_and_register;
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 06/17] dm exception store: add snapshot-merge specific methods
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (4 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 05/17] dm exception store: do not read metadata if going to handover exceptions Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 07/17] dm exception store: snapshot-merge usage accounting Mike Snitzer
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

prepare_merge: returns the last chunk in the variables passed by reference.
	The return value is the number of consecutive chunks.
commit_merge: permanently removes 'n' chunks from the exception store.
	'n' is less or equal that the number returned by prepare_merge.

If the caller wishes, it can do the optimization of merging several consecutive
chunks at once. If it doesn't want to do this optimization, it just calls
commit_merge with n == 1.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-exception-store.h |   16 +++++++++
 drivers/md/dm-snap-persistent.c |   70 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index a96a40a..9cdc08b 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -79,6 +79,22 @@ struct dm_exception_store_type {
 				  void *callback_context);
 
 	/*
+	 * Returns the last chunk in the pointers. (TODO: -ENOPARSE)
+	 * > 0:  the number of consecutive chunks that can
+	 *       be copied in one shot.
+	 * == 0: the exception store is empty.
+	 * < 0:  error.
+	 */
+	int (*prepare_merge) (struct dm_exception_store *store,
+			      chunk_t *old_chunk, chunk_t *new_chunk);
+
+	/*
+	 * Clear the last n exceptions.
+	 * n must be <= the value returned by prepare_merge.
+	 */
+	int (*commit_merge) (struct dm_exception_store *store, int n);
+
+	/*
 	 * The snapshot is invalid, note this in the metadata.
 	 */
 	void (*drop_snapshot) (struct dm_exception_store *store);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index e0f526a..4015923 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -409,6 +409,15 @@ static void write_exception(struct pstore *ps,
 	e->new_chunk = cpu_to_le64(de->new_chunk);
 }
 
+static void clear_exception(struct pstore *ps, uint32_t index)
+{
+	struct disk_exception *e = get_exception(ps, index);
+
+	/* clear it */
+	e->old_chunk = 0;
+	e->new_chunk = 0;
+}
+
 /*
  * Registers the exceptions that are present in the current area.
  * 'full' is filled in to indicate if the area has been
@@ -681,6 +690,63 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 	ps->callback_count = 0;
 }
 
+static int persistent_prepare_merge(struct dm_exception_store *store,
+				    chunk_t *old_chunk, chunk_t *new_chunk)
+{
+	int r, i;
+	struct pstore *ps = get_info(store);
+	struct disk_exception de;
+
+	if (!ps->current_committed) {
+		if (!ps->current_area)
+			return 0;
+		ps->current_area--;
+		r = area_io(ps, READ);
+		if (r < 0)
+			return r;
+		ps->current_committed = ps->exceptions_per_area;
+	}
+
+	read_exception(ps, ps->current_committed - 1, &de);
+	*old_chunk = de.old_chunk;
+	*new_chunk = de.new_chunk;
+
+	for (i = 1; i < ps->current_committed; i++) {
+		read_exception(ps, ps->current_committed - 1 - i, &de);
+		if (de.old_chunk != *old_chunk - i ||
+		    de.new_chunk != *new_chunk - i)
+			break;
+	}
+
+	return i;
+}
+
+static int persistent_commit_merge(struct dm_exception_store *store, int n)
+{
+	int r, i;
+	struct pstore *ps = get_info(store);
+
+	BUG_ON(n > ps->current_committed);
+
+	for (i = 0; i < n; i++)
+		clear_exception(ps, ps->current_committed - 1 - i);
+
+	r = area_io(ps, WRITE);
+	if (r < 0)
+		return r;
+
+	ps->current_committed -= i;
+
+	/*
+	 * ps->next_free cannot really be reliably decreased here (because of
+	 * misordered chunks), so don't do it. We don't even need it, because
+	 * there is no situation where merging snapshot would become
+	 * non-merging.
+	 */
+
+	return 0;
+}
+
 static void persistent_drop_snapshot(struct dm_exception_store *store)
 {
 	struct pstore *ps = get_info(store);
@@ -749,6 +815,8 @@ static struct dm_exception_store_type _persistent_type = {
 	.read_metadata = persistent_read_metadata,
 	.prepare_exception = persistent_prepare_exception,
 	.commit_exception = persistent_commit_exception,
+	.prepare_merge = persistent_prepare_merge,
+	.commit_merge = persistent_commit_merge,
 	.drop_snapshot = persistent_drop_snapshot,
 	.usage = persistent_usage,
 	.status = persistent_status,
@@ -762,6 +830,8 @@ static struct dm_exception_store_type _persistent_compat_type = {
 	.read_metadata = persistent_read_metadata,
 	.prepare_exception = persistent_prepare_exception,
 	.commit_exception = persistent_commit_exception,
+	.prepare_merge = persistent_prepare_merge,
+	.commit_merge = persistent_commit_merge,
 	.drop_snapshot = persistent_drop_snapshot,
 	.usage = persistent_usage,
 	.status = persistent_status,
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 07/17] dm exception store: snapshot-merge usage accounting
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (5 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 06/17] dm exception store: add snapshot-merge specific methods Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 08/17] dm snapshot: add snapshot-merge target Mike Snitzer
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

Conditionally adjust snapshot usage accounting if snapshot-merge is in
progress.  Care is taken to preserve the established kernel<->userspace
interface.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-snap-persistent.c |   21 ++++++++++++++++++++-
 1 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 4015923..2cac778 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -91,6 +91,7 @@ struct pstore {
 	struct dm_exception_store *store;
 	int version;
 	int valid;
+	int merging;	/* 1 if there is merging going on */
 	uint32_t exceptions_per_area;
 
 	/*
@@ -506,7 +507,20 @@ static void persistent_usage(struct dm_exception_store *store,
 {
 	struct pstore *ps = get_info(store);
 
-	*sectors_allocated = ps->next_free * store->chunk_size;
+	/*
+	 * Must maintain the fact that DM reports all metadata chunks
+	 * in 'sectors_allocated'
+	 * - preserves the established kernel<->userspace interface
+	 * - snapshot-merge must account for the first two metadata
+	 *   chunks in its 'sectors_allocated'
+	 */
+	if (!ps->merging) {
+		*sectors_allocated = ps->next_free * store->chunk_size;
+	} else {
+		*sectors_allocated =
+			(area_location(ps, ps->current_area) - 1 +
+			 ps->current_committed + 2) * store->chunk_size;
+	}
 	*total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
 
 	/*
@@ -609,6 +623,8 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
 	chunk_t next_free;
 	sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
 
+	ps->merging = 0;
+
 	/* Is there enough room ? */
 	if (size < ((ps->next_free + 1) * store->chunk_size))
 		return -ENOSPC;
@@ -697,6 +713,8 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
 	struct pstore *ps = get_info(store);
 	struct disk_exception de;
 
+	ps->merging = 1;
+
 	if (!ps->current_committed) {
 		if (!ps->current_area)
 			return 0;
@@ -768,6 +786,7 @@ static int persistent_ctr(struct dm_exception_store *store,
 
 	ps->store = store;
 	ps->valid = 1;
+	ps->merging = 0;
 	ps->version = SNAPSHOT_DISK_VERSION;
 	ps->area = NULL;
 	ps->zero_area = NULL;
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 08/17] dm snapshot: add snapshot-merge target
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (6 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 07/17] dm exception store: snapshot-merge usage accounting Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 09/17] dm snapshot: merge target should not allocate new exceptions Mike Snitzer
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

For starters, snapshot-merge is identical to the snapshot target.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   46 ++++++++++++++++++++++++++++++++++------------
 1 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 1f0a57c..d83a26e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1579,6 +1579,19 @@ static struct target_type snapshot_target = {
 	.iterate_devices = snapshot_iterate_devices,
 };
 
+static struct target_type merge_target = {
+	.name    = "snapshot-merge",
+	.version = {1, 9, 0},
+	.module  = THIS_MODULE,
+	.ctr     = snapshot_ctr,
+	.dtr     = snapshot_dtr,
+	.map     = snapshot_map,
+	.end_io  = snapshot_end_io,
+	.resume  = snapshot_resume,
+	.status  = snapshot_status,
+	.iterate_devices = snapshot_iterate_devices,
+};
+
 static int __init dm_snapshot_init(void)
 {
 	int r;
@@ -1590,7 +1603,7 @@ static int __init dm_snapshot_init(void)
 	}
 
 	r = dm_register_target(&snapshot_target);
-	if (r) {
+	if (r < 0) {
 		DMERR("snapshot target register failed %d", r);
 		goto bad_register_snapshot_target;
 	}
@@ -1598,34 +1611,40 @@ static int __init dm_snapshot_init(void)
 	r = dm_register_target(&origin_target);
 	if (r < 0) {
 		DMERR("Origin target register failed %d", r);
-		goto bad1;
+		goto bad_register_origin_target;
+	}
+
+	r = dm_register_target(&merge_target);
+	if (r < 0) {
+		DMERR("Merge target register failed %d", r);
+		goto bad_register_merge_target;
 	}
 
 	r = init_origin_hash();
 	if (r) {
 		DMERR("init_origin_hash failed.");
-		goto bad2;
+		goto bad_origin_hash;
 	}
 
 	exception_cache = KMEM_CACHE(dm_exception, 0);
 	if (!exception_cache) {
 		DMERR("Couldn't create exception cache.");
 		r = -ENOMEM;
-		goto bad3;
+		goto bad_exception_cache;
 	}
 
 	pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
 	if (!pending_cache) {
 		DMERR("Couldn't create pending cache.");
 		r = -ENOMEM;
-		goto bad4;
+		goto bad_pending_cache;
 	}
 
 	tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
 	if (!tracked_chunk_cache) {
 		DMERR("Couldn't create cache to track chunks in use.");
 		r = -ENOMEM;
-		goto bad5;
+		goto bad_tracked_chunk_cache;
 	}
 
 	ksnapd = create_singlethread_workqueue("ksnapd");
@@ -1639,19 +1658,21 @@ static int __init dm_snapshot_init(void)
 
 bad_pending_pool:
 	kmem_cache_destroy(tracked_chunk_cache);
-bad5:
+bad_tracked_chunk_cache:
 	kmem_cache_destroy(pending_cache);
-bad4:
+bad_pending_cache:
 	kmem_cache_destroy(exception_cache);
-bad3:
+bad_exception_cache:
 	exit_origin_hash();
-bad2:
+bad_origin_hash:
+	dm_unregister_target(&merge_target);
+bad_register_merge_target:
 	dm_unregister_target(&origin_target);
-bad1:
+bad_register_origin_target:
 	dm_unregister_target(&snapshot_target);
-
 bad_register_snapshot_target:
 	dm_exception_store_exit();
+
 	return r;
 }
 
@@ -1661,6 +1682,7 @@ static void __exit dm_snapshot_exit(void)
 
 	dm_unregister_target(&snapshot_target);
 	dm_unregister_target(&origin_target);
+	dm_unregister_target(&merge_target);
 
 	exit_origin_hash();
 	kmem_cache_destroy(pending_cache);
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 09/17] dm snapshot: merge target should not allocate new exceptions
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (7 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 08/17] dm snapshot: add snapshot-merge target Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 10/17] dm snapshot: do not allow more than one merging snapshot Mike Snitzer
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

Make new method, snapshot_merge_map, that won't allocate exceptions.
Modify __origin_write so that it doesn't allocate exceptions in
merging snapshots.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   46 +++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 45 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index d83a26e..ea3ff81 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -147,6 +147,11 @@ static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
 	return lhs == rhs;
 }
 
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+			       union map_info *map_context);
+
+#define is_merge(ti)	((ti)->type->map == snapshot_merge_map)
+
 struct dm_snap_pending_exception {
 	struct dm_exception e;
 
@@ -1239,6 +1244,40 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 	return r;
 }
 
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+			       union map_info *map_context)
+{
+	struct dm_exception *e;
+	struct dm_snapshot *s = ti->private;
+	int r = DM_MAPIO_REMAPPED;
+	chunk_t chunk;
+
+	chunk = sector_to_chunk(s->store, bio->bi_sector);
+
+	down_read(&s->lock);
+
+	/* Full snapshots are not usable */
+	if (!s->valid) {
+		r = -EIO;
+		goto out_unlock;
+	}
+
+	/* If the block is already remapped - use that */
+	e = dm_lookup_exception(&s->complete, chunk);
+	if (e) {
+		remap_exception(s, e, bio, chunk);
+		goto out_unlock;
+	}
+
+	bio->bi_bdev = s->origin->bdev;
+
+ out_unlock:
+	up_read(&s->lock);
+
+	return r;
+}
+
+
 static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
 			   int error, union map_info *map_context)
 {
@@ -1358,6 +1397,11 @@ static int __origin_write(struct list_head *snapshots,
 	/* Do all the snapshots on this origin */
 	list_for_each_entry (snap, snapshots, list) {
 
+		/* If the snapshot is merging, don't make new exceptions in it
+		   - but in this case no one should be writing to the origin */
+		if (is_merge(snap->ti))
+			continue;
+
 		down_write(&snap->lock);
 
 		/* Only deal with valid and active snapshots */
@@ -1585,7 +1629,7 @@ static struct target_type merge_target = {
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
-	.map     = snapshot_map,
+	.map     = snapshot_merge_map,
 	.end_io  = snapshot_end_io,
 	.resume  = snapshot_resume,
 	.status  = snapshot_status,
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 10/17] dm snapshot: do not allow more than one merging snapshot.
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (8 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 09/17] dm snapshot: merge target should not allocate new exceptions Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:46 ` [PATCH v2 11/17] dm snapshot: the merge procedure Mike Snitzer
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

It is pointless to merge two snapshots simultaneously.
Allowing multiple merging snapshots would complicate things later.
Adds function to find merging snapshot for a given origin device.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   23 +++++++++++++++++++++++
 1 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ea3ff81..c422355 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -305,6 +305,22 @@ static void __insert_origin(struct origin *o)
 	list_add_tail(&o->hash_list, sl);
 }
 
+static struct dm_snapshot *__find_merging_snapshot(struct block_device *origin)
+{
+	struct origin *o;
+	struct dm_snapshot *snap;
+
+	o = __lookup_origin(origin);
+	if (!o)
+		return NULL;
+
+	list_for_each_entry(snap, &o->snapshots, list)
+		if (is_merge(snap->ti))
+			return snap;
+
+	return NULL;
+}
+
 /*
  * Make a note of the snapshot and its origin so we can look it
  * up when the origin has a write on it.
@@ -320,6 +336,13 @@ static int register_snapshot(struct dm_snapshot *snap)
 		return -ENOMEM;
 
 	down_write(&_origins_lock);
+
+	/* Do not allow more than one merging snapshot */
+	if (is_merge(snap->ti) && __find_merging_snapshot(bdev)) {
+		up_write(&_origins_lock);
+		return -EINVAL;
+	}
+
 	o = __lookup_origin(bdev);
 
 	if (o)
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 11/17] dm snapshot: the merge procedure
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (9 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 10/17] dm snapshot: do not allow more than one merging snapshot Mike Snitzer
@ 2009-10-20 22:46 ` Mike Snitzer
  2009-10-20 22:47 ` [PATCH v2 12/17] dm snapshot: queue writes to an area that is actively being merged Mike Snitzer
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:46 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

We don't need a separate thread, kcopyd does the job just fine
(provided that we have private kcopyd).

Merging is started when origin is resumed and it is stopped when
origin is suspended or when the merging snapshot is destoyed.

Merging is not interlocked with writes, so there is a race condition
with concurrent access. It will be fixed in further patches.

Adds a supporting function to decrement consecutive chunk counter.
Care is taken to increment the exception's old_chunk and new_chunk,
prior to the dm_consecutive_chunk_count_dec() call, if the chunk is at
the start of an exception's consecutive chunk range.  This allows for
snapshot-merge to support chunks that are added to the 'complete'
exception hash table before existing chunks.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-exception-store.h |   11 +++
 drivers/md/dm-snap.c            |  161 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 171 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 9cdc08b..4f18902 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -157,6 +157,13 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
 	BUG_ON(!dm_consecutive_chunk_count(e));
 }
 
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
+{
+	BUG_ON(!dm_consecutive_chunk_count(e));
+
+	e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS);
+}
+
 #  else
 #    define DM_CHUNK_CONSECUTIVE_BITS 0
 
@@ -174,6 +181,10 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
 {
 }
 
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
+{
+}
+
 #  endif
 
 /*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c422355..55f5dec 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -121,6 +121,13 @@ struct dm_snapshot {
 	mempool_t *tracked_chunk_pool;
 	spinlock_t tracked_chunk_lock;
 	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+
+	/* Merge operation is in progress */
+	int merge_running;
+
+	/* It is requested to shut down merging */
+	/* Cleared back to 0 when the merging is stopped */
+	int merge_shutdown;
 };
 
 struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
@@ -649,6 +656,124 @@ static int init_hash_tables(struct dm_snapshot *s)
 	return 0;
 }
 
+static void merge_callback(int read_err, unsigned long write_err,
+			   void *context);
+
+static void snapshot_merge_process(struct dm_snapshot *s)
+{
+	int r;
+	chunk_t old_chunk, new_chunk;
+	struct dm_exception *e;
+	struct dm_io_region src, dest;
+
+	BUG_ON(!s->merge_running);
+	if (s->merge_shutdown)
+		goto shut;
+
+	if (!s->valid) {
+		DMERR("snapshot is invalid, can't merge");
+		goto shut;
+	}
+
+	r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
+	if (r <= 0) {
+		if (r < 0)
+			DMERR("Read error in exception store, "
+			      "shutting down merge");
+		goto shut;
+	}
+
+	/* TODO: use larger I/O size once we verify that kcopyd handles it */
+
+	/* !!! FIXME: intelock writes to this chunk */
+	down_write(&s->lock);
+	e = dm_lookup_exception(&s->complete, old_chunk);
+	if (!e) {
+		DMERR("exception for block %llu is on disk but not in memory",
+		      (unsigned long long)old_chunk);
+		up_write(&s->lock);
+		goto shut;
+	}
+	if (dm_consecutive_chunk_count(e)) {
+		if (old_chunk == e->old_chunk) {
+			e->old_chunk++;
+			e->new_chunk++;
+		} else if (old_chunk != e->old_chunk +
+			   dm_consecutive_chunk_count(e)) {
+			DMERR("merge from the middle of a chunk range");
+			up_write(&s->lock);
+			goto shut;
+		}
+		dm_consecutive_chunk_count_dec(e);
+	} else {
+		dm_remove_exception(e);
+		free_completed_exception(e);
+	}
+	up_write(&s->lock);
+
+	dest.bdev = s->origin->bdev;
+	dest.sector = chunk_to_sector(s->store, old_chunk);
+	dest.count = min((sector_t)s->store->chunk_size,
+			 get_dev_size(dest.bdev) - dest.sector);
+
+	src.bdev = s->cow->bdev;
+	src.sector = chunk_to_sector(s->store, new_chunk);
+	src.count = dest.count;
+
+	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
+	return;
+
+shut:
+	s->merge_running = 0;
+}
+
+static void merge_callback(int read_err, unsigned long write_err, void *context)
+{
+	int r;
+	struct dm_snapshot *s = context;
+
+	if (read_err || write_err) {
+		if (read_err)
+			DMERR("Read error in data, shutting down merge");
+		else
+			DMERR("Write error in data, shutting down merge");
+		goto shut;
+	}
+
+	r = s->store->type->commit_merge(s->store, 1);
+	if (r < 0) {
+		DMERR("Write error in exception store, shutting down merge");
+		goto shut;
+	}
+
+	snapshot_merge_process(s);
+	return;
+
+shut:
+	s->merge_running = 0;
+}
+
+static void start_merge(struct dm_snapshot *merging)
+{
+	if (!merging->merge_running && !merging->merge_shutdown) {
+		merging->merge_running = 1;
+		snapshot_merge_process(merging);
+	}
+}
+
+/*
+ * Stop the merging process and wait until it finishes.
+ */
+
+static void stop_merge(struct dm_snapshot *merging)
+{
+	while (merging->merge_running) {
+		merging->merge_shutdown = 1;
+		msleep(1);
+	}
+	merging->merge_shutdown = 0;
+}
+
 /*
  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
  */
@@ -712,6 +837,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	atomic_set(&s->pending_exceptions_count, 0);
 	s->handover = 0;
 	s->handover_snap = NULL;
+	s->merge_running = 0;
+	s->merge_shutdown = 0;
 	init_rwsem(&s->lock);
 	spin_lock_init(&s->pe_lock);
 
@@ -758,11 +885,23 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 				    "constructed with the same cow device.";
 			goto bad_load_and_register;
 		}
+
+		if (is_merge(ti) &&
+		    (!dup->store->type->prepare_merge ||
+		     !dup->store->type->commit_merge)) {
+			ti->error =
+				"Merging snapshot must support snapshot-merge";
+			goto bad_load_and_register;
+		}
+
 		/* cross reference snapshots that will do handover */
 		dup->handover_snap = s;
 		s->handover_snap = dup;
 		/* this new snapshot will accept the handover */
 		s->handover = 1;
+	} else if (is_merge(ti)) {
+		ti->error = "Unable to find snapshot that is to be merged";
+		goto bad_load_and_register;
 	}
 
 	/* Metadata must only be loaded into one table at once */
@@ -889,6 +1028,9 @@ static void snapshot_dtr(struct dm_target *ti)
 	}
 	up_write(&s->lock);
 
+	if (is_merge(ti))
+		stop_merge(s);
+
 	/* Prevent further origin writes from using this snapshot. */
 	/* After this returns there can be no new kcopyd jobs. */
 	unregister_snapshot(s);
@@ -1342,6 +1484,22 @@ static void snapshot_resume(struct dm_target *ti)
 	up_write(&s->lock);
 }
 
+static void snapshot_merge_resume(struct dm_target *ti)
+{
+	struct dm_snapshot *s = ti->private;
+
+	snapshot_resume(ti);
+	start_merge(s);
+}
+
+static void snapshot_merge_presuspend(struct dm_target *ti)
+{
+	struct dm_snapshot *s = ti->private;
+
+	snapshot_presuspend(ti);
+	stop_merge(s);
+}
+
 static int snapshot_status(struct dm_target *ti, status_type_t type,
 			   char *result, unsigned int maxlen)
 {
@@ -1654,7 +1812,8 @@ static struct target_type merge_target = {
 	.dtr     = snapshot_dtr,
 	.map     = snapshot_merge_map,
 	.end_io  = snapshot_end_io,
-	.resume  = snapshot_resume,
+	.presuspend = snapshot_merge_presuspend,
+	.resume  = snapshot_merge_resume,
 	.status  = snapshot_status,
 	.iterate_devices = snapshot_iterate_devices,
 };
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 12/17] dm snapshot: queue writes to an area that is actively being merged
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (10 preceding siblings ...)
  2009-10-20 22:46 ` [PATCH v2 11/17] dm snapshot: the merge procedure Mike Snitzer
@ 2009-10-20 22:47 ` Mike Snitzer
  2009-10-20 22:47 ` [PATCH v2 13/17] dm snapshot: do not merge a chunk until active writes to it finish Mike Snitzer
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:47 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

New variables merge_write_interlock and merge_write_interlock_n
determine the chunk number (on the origin device) and number of chunks
that are being merged.  Writes to this area are held on the queue
merge_write_list.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |  119 +++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 88 insertions(+), 31 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 55f5dec..95c45ca 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -128,6 +128,16 @@ struct dm_snapshot {
 	/* It is requested to shut down merging */
 	/* Cleared back to 0 when the merging is stopped */
 	int merge_shutdown;
+
+	/* Merging this area --- block any writes */
+	chunk_t merge_write_interlock;
+	int merge_write_interlock_n;
+
+	/*
+	 * A list of requests that were delayed because
+	 * of racing with merge
+	 */
+	struct bio_list merge_write_list;
 };
 
 struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
@@ -656,6 +666,9 @@ static int init_hash_tables(struct dm_snapshot *s)
 	return 0;
 }
 
+static void flush_bios(struct bio *bio);
+static void error_bios(struct bio *bio);
+
 static void merge_callback(int read_err, unsigned long write_err,
 			   void *context);
 
@@ -663,7 +676,6 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 {
 	int r;
 	chunk_t old_chunk, new_chunk;
-	struct dm_exception *e;
 	struct dm_io_region src, dest;
 
 	BUG_ON(!s->merge_running);
@@ -685,32 +697,6 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 
 	/* TODO: use larger I/O size once we verify that kcopyd handles it */
 
-	/* !!! FIXME: intelock writes to this chunk */
-	down_write(&s->lock);
-	e = dm_lookup_exception(&s->complete, old_chunk);
-	if (!e) {
-		DMERR("exception for block %llu is on disk but not in memory",
-		      (unsigned long long)old_chunk);
-		up_write(&s->lock);
-		goto shut;
-	}
-	if (dm_consecutive_chunk_count(e)) {
-		if (old_chunk == e->old_chunk) {
-			e->old_chunk++;
-			e->new_chunk++;
-		} else if (old_chunk != e->old_chunk +
-			   dm_consecutive_chunk_count(e)) {
-			DMERR("merge from the middle of a chunk range");
-			up_write(&s->lock);
-			goto shut;
-		}
-		dm_consecutive_chunk_count_dec(e);
-	} else {
-		dm_remove_exception(e);
-		free_completed_exception(e);
-	}
-	up_write(&s->lock);
-
 	dest.bdev = s->origin->bdev;
 	dest.sector = chunk_to_sector(s->store, old_chunk);
 	dest.count = min((sector_t)s->store->chunk_size,
@@ -720,6 +706,13 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 	src.sector = chunk_to_sector(s->store, new_chunk);
 	src.count = dest.count;
 
+	down_write(&s->lock);
+	s->merge_write_interlock = old_chunk;
+	s->merge_write_interlock_n = 1;
+	up_write(&s->lock);
+
+	/* !!! FIXME: wait until writes to this chunk drain */
+
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
 	return;
 
@@ -727,10 +720,25 @@ shut:
 	s->merge_running = 0;
 }
 
+/* This function drops s->lock */
+static inline void release_write_interlock(struct dm_snapshot *s, int err)
+{
+	struct bio *b;
+	s->merge_write_interlock = 0;
+	s->merge_write_interlock_n = 0;
+	b = bio_list_get(&s->merge_write_list);
+	up_write(&s->lock);
+	if (!err)
+		flush_bios(b);
+	else
+		error_bios(b);
+}
+
 static void merge_callback(int read_err, unsigned long write_err, void *context)
 {
-	int r;
+	int r, i;
 	struct dm_snapshot *s = context;
+	struct dm_exception *e;
 
 	if (read_err || write_err) {
 		if (read_err)
@@ -740,16 +748,51 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
 		goto shut;
 	}
 
-	r = s->store->type->commit_merge(s->store, 1);
+	r = s->store->type->commit_merge(s->store, s->merge_write_interlock_n);
 	if (r < 0) {
 		DMERR("Write error in exception store, shutting down merge");
 		goto shut;
 	}
 
+	down_write(&s->lock);
+	/*
+	 * Must process chunks (and associated exceptions) in reverse
+	 * so that dm_consecutive_chunk_count_dec() accounting works
+	 */
+	for (i = s->merge_write_interlock_n - 1; i >= 0; i--) {
+		chunk_t old_chunk = s->merge_write_interlock + i;
+		e = dm_lookup_exception(&s->complete, old_chunk);
+		if (!e) {
+			DMERR("exception for block %llu is on "
+			      "disk but not in memory",
+			      (unsigned long long)old_chunk);
+			up_write(&s->lock);
+			goto shut;
+		}
+		if (dm_consecutive_chunk_count(e)) {
+			if (old_chunk == e->old_chunk) {
+				e->old_chunk++;
+				e->new_chunk++;
+			} else if (old_chunk != e->old_chunk +
+				   dm_consecutive_chunk_count(e)) {
+				DMERR("merge from the middle of a chunk range");
+				up_write(&s->lock);
+				goto shut;
+			}
+			dm_consecutive_chunk_count_dec(e);
+		} else {
+			dm_remove_exception(e);
+			free_completed_exception(e);
+		}
+	}
+	release_write_interlock(s, 0);
+
 	snapshot_merge_process(s);
 	return;
 
 shut:
+	down_write(&s->lock);
+	release_write_interlock(s, 1);
 	s->merge_running = 0;
 }
 
@@ -841,6 +884,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	s->merge_shutdown = 0;
 	init_rwsem(&s->lock);
 	spin_lock_init(&s->pe_lock);
+	s->merge_write_interlock = 0;
+	s->merge_write_interlock_n = 0;
+	bio_list_init(&s->merge_write_list);
 
 	/* Allocate hash table for COW data */
 	if (init_hash_tables(s)) {
@@ -1419,7 +1465,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 
 	chunk = sector_to_chunk(s->store, bio->bi_sector);
 
-	down_read(&s->lock);
+	down_write(&s->lock);
 
 	/* Full snapshots are not usable */
 	if (!s->valid) {
@@ -1430,6 +1476,17 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 	/* If the block is already remapped - use that */
 	e = dm_lookup_exception(&s->complete, chunk);
 	if (e) {
+		/* We are copying this area --- so don't write to it */
+		if (bio_rw(bio) == WRITE &&
+		    chunk >= s->merge_write_interlock &&
+		    chunk < (s->merge_write_interlock +
+			     s->merge_write_interlock_n)) {
+			bio->bi_bdev = s->origin->bdev;
+			bio_list_add(&s->merge_write_list, bio);
+
+			r = DM_MAPIO_SUBMITTED;
+			goto out_unlock;
+		}
 		remap_exception(s, e, bio, chunk);
 		goto out_unlock;
 	}
@@ -1437,7 +1494,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 	bio->bi_bdev = s->origin->bdev;
 
  out_unlock:
-	up_read(&s->lock);
+	up_write(&s->lock);
 
 	return r;
 }
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 13/17] dm snapshot: do not merge a chunk until active writes to it finish
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (11 preceding siblings ...)
  2009-10-20 22:47 ` [PATCH v2 12/17] dm snapshot: queue writes to an area that is actively being merged Mike Snitzer
@ 2009-10-20 22:47 ` Mike Snitzer
  2009-10-20 22:47 ` [PATCH v2 14/17] dm snapshot: make exceptions in other snapshots when merging Mike Snitzer
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:47 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

Track in-progress writes on the merging snapshot device and delay
merging the chunk until all writes to that chunk finish.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 95c45ca..72ff282 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -711,7 +711,8 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 	s->merge_write_interlock_n = 1;
 	up_write(&s->lock);
 
-	/* !!! FIXME: wait until writes to this chunk drain */
+	while (__chunk_is_tracked(s, old_chunk))
+		msleep(1);
 
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
 	return;
@@ -1487,7 +1488,11 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 			r = DM_MAPIO_SUBMITTED;
 			goto out_unlock;
 		}
+
 		remap_exception(s, e, bio, chunk);
+
+		if (bio_rw(bio) == WRITE)
+			map_context->ptr = track_chunk(s, chunk);
 		goto out_unlock;
 	}
 
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 14/17] dm snapshot: make exceptions in other snapshots when merging
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (12 preceding siblings ...)
  2009-10-20 22:47 ` [PATCH v2 13/17] dm snapshot: do not merge a chunk until active writes to it finish Mike Snitzer
@ 2009-10-20 22:47 ` Mike Snitzer
  2009-10-20 22:47 ` [PATCH v2 15/17] dm snapshot: make exceptions if merge is dispatching to origin Mike Snitzer
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:47 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

When there is one merging snapshot and other non-merging snapshots,
make exceptions in non-merging snapshots while merging.

This is not yet complete (the exception is not made when remapping
directly to the origin), another patch fixes that.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |   31 +++++++++++++++++++++++++++++++
 1 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 72ff282..7f0d261 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -275,6 +275,8 @@ struct origin {
 static struct list_head *_origins;
 static struct rw_semaphore _origins_lock;
 
+static DECLARE_WAIT_QUEUE_HEAD(_pending_exception_done);
+
 static int init_origin_hash(void)
 {
 	int i;
@@ -669,6 +671,9 @@ static int init_hash_tables(struct dm_snapshot *s)
 static void flush_bios(struct bio *bio);
 static void error_bios(struct bio *bio);
 
+static int __origin_write(struct list_head *snapshots,
+			  sector_t sector, struct bio *bio);
+
 static void merge_callback(int read_err, unsigned long write_err,
 			   void *context);
 
@@ -676,6 +681,9 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 {
 	int r;
 	chunk_t old_chunk, new_chunk;
+	struct origin *o;
+	chunk_t min_chunksize;
+	int must_wait;
 	struct dm_io_region src, dest;
 
 	BUG_ON(!s->merge_running);
@@ -706,6 +714,27 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 	src.sector = chunk_to_sector(s->store, new_chunk);
 	src.count = dest.count;
 
+test_again:
+	/* Reallocate other snapshots */
+	down_read(&_origins_lock);
+	o = __lookup_origin(s->origin->bdev);
+	must_wait = 0;
+	min_chunksize = __minimum_chunk_size(o);
+	if (min_chunksize) {
+		chunk_t n;
+		for (n = 0; n < s->store->chunk_size; n += min_chunksize) {
+			r = __origin_write(&o->snapshots, dest.sector + n,
+					   NULL);
+			if (r == DM_MAPIO_SUBMITTED)
+				must_wait = 1;
+		}
+	}
+	up_read(&_origins_lock);
+	if (must_wait) {
+		sleep_on_timeout(&_pending_exception_done, HZ / 100 + 1);
+		goto test_again;
+	}
+
 	down_write(&s->lock);
 	s->merge_write_interlock = old_chunk;
 	s->merge_write_interlock_n = 1;
@@ -1245,6 +1274,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 	origin_bios = bio_list_get(&pe->origin_bios);
 	free_pending_exception(pe);
 
+	wake_up_all(&_pending_exception_done);
+
 	up_write(&s->lock);
 
 	/* Submit any pending write bios */
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 15/17] dm snapshot: make exceptions if merge is dispatching to origin
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (13 preceding siblings ...)
  2009-10-20 22:47 ` [PATCH v2 14/17] dm snapshot: make exceptions in other snapshots when merging Mike Snitzer
@ 2009-10-20 22:47 ` Mike Snitzer
  2009-10-20 22:47 ` [PATCH v2 16/17] dm snapshot: redirect accesses to origin if merging snap invalidated Mike Snitzer
  2009-10-20 22:47 ` [PATCH v2 17/17] dm snapshot: merge a linear region of chunks using one large IO Mike Snitzer
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:47 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

If write request to merging snapshot device is to be dispatched
directly to the origin (because the chunk is not remapped or was
already merged), we must make exceptions in other snapshots.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7f0d261..f67bfff 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1529,6 +1529,11 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 
 	bio->bi_bdev = s->origin->bdev;
 
+	if (bio_rw(bio) == WRITE) {
+		up_write(&s->lock);
+		return do_origin(s->origin, bio);
+	}
+
  out_unlock:
 	up_write(&s->lock);
 
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 16/17] dm snapshot: redirect accesses to origin if merging snap invalidated
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (14 preceding siblings ...)
  2009-10-20 22:47 ` [PATCH v2 15/17] dm snapshot: make exceptions if merge is dispatching to origin Mike Snitzer
@ 2009-10-20 22:47 ` Mike Snitzer
  2009-10-20 22:47 ` [PATCH v2 17/17] dm snapshot: merge a linear region of chunks using one large IO Mike Snitzer
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:47 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

From: Mikulas Patocka <mpatocka@redhat.com>

If we are merging invalidated snapshot, redirect all accesses to the
origin device.

Userspace code will remove the invalidated snapshot after a few
seconds, but during this period, the origin device must be active (it
may for example contain root filesystem and having unbootable computer
is not desirable).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f67bfff..9ef5e5c 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1499,11 +1499,9 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 
 	down_write(&s->lock);
 
-	/* Full snapshots are not usable */
-	if (!s->valid) {
-		r = -EIO;
-		goto out_unlock;
-	}
+	/* Full merging snapshots are redirected to the origin */
+	if (!s->valid)
+		goto redirect_to_origin;
 
 	/* If the block is already remapped - use that */
 	e = dm_lookup_exception(&s->complete, chunk);
@@ -1527,6 +1525,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 		goto out_unlock;
 	}
 
+ redirect_to_origin:
 	bio->bi_bdev = s->origin->bdev;
 
 	if (bio_rw(bio) == WRITE) {
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v2 17/17] dm snapshot: merge a linear region of chunks using one large IO
  2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
                   ` (15 preceding siblings ...)
  2009-10-20 22:47 ` [PATCH v2 16/17] dm snapshot: redirect accesses to origin if merging snap invalidated Mike Snitzer
@ 2009-10-20 22:47 ` Mike Snitzer
  16 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-20 22:47 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka

s->store->type->prepare_merge returns the number of chunks that can be
linearly copied starting from the returned chunk number backward. (but
the caller is allowed to copy less, and the caller puts the number of
copied chunks to s->store->type->commit_merge)

I.e. if returned chunk numbers are old_chunk == 10 and new_chunk == 20
and returned value is 3, then chunk 20 can be copied to 10, chunk 19 to
9 and 18 to 8.

s->merge_write_interlock_n has now been allowed to be increased up to
the full range of chunks returned from s->store->type->prepare_merge.
Until now kcopyd was only ever allowed to copy one chunk at a time;
as a result snapshot-merge performance was extremely slow.

Also, snapshot_merge_process() needs to delay the merging of _all_
chunks that have in-progress writes; not just the first chunk in the
region that is to be merged.


Here are performance results from some mkfs-based testing:

# lvcreate -n testlv -L 32G test
# lvcreate -n testlv_snap -s -L 7G test/testlv

# time mkfs.ext3 /dev/test/testlv
...
real    1m7.827s
user    0m0.116s
sys     0m11.017s

# lvs
 LV          VG   Attr   LSize  Origin Snap%  Move Log Copy%  Convert
 testlv      test owi-a- 32.00G
 testlv_snap test swi-a-  7.00G testlv   9.05

before:
-------
# time lvconvert --merge test/testlv_snap
 Merging of volume testlv_snap started.
 ...
 Merge into logical volume testlv finished.
 Logical volume "snapshot1" successfully removed

real    22m33.100s
user    0m0.045s
sys     0m0.711s


after:
------
# time lvconvert --merge test/testlv_snap
 Merging of volume testlv_snap started.
 testlv: Merged: 6.4%
 testlv: Merged: 3.5%
 testlv: Merged: 0.9%
 testlv: Merged: 0.0%
 Merge into logical volume testlv finished.
 Logical volume "snapshot1" successfully removed

real    1m0.881s
user    0m0.015s
sys     0m0.560s


So we're now seeing _very_ respectible snapshot-merge performance.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-snap.c |   35 +++++++++++++++++++++++------------
 1 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9ef5e5c..fca2479 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -679,12 +679,13 @@ static void merge_callback(int read_err, unsigned long write_err,
 
 static void snapshot_merge_process(struct dm_snapshot *s)
 {
-	int r;
+	int r, i, linear_chunks;
 	chunk_t old_chunk, new_chunk;
 	struct origin *o;
 	chunk_t min_chunksize;
 	int must_wait;
 	struct dm_io_region src, dest;
+	sector_t io_size;
 
 	BUG_ON(!s->merge_running);
 	if (s->merge_shutdown)
@@ -695,34 +696,41 @@ static void snapshot_merge_process(struct dm_snapshot *s)
 		goto shut;
 	}
 
-	r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
-	if (r <= 0) {
-		if (r < 0)
+	linear_chunks = s->store->type->prepare_merge(s->store,
+						      &old_chunk, &new_chunk);
+	if (linear_chunks <= 0) {
+		if (linear_chunks < 0)
 			DMERR("Read error in exception store, "
 			      "shutting down merge");
 		goto shut;
 	}
+	/* Adjust old_chunk and new_chunk to reflect start of linear region */
+	old_chunk = old_chunk + 1 - linear_chunks;
+	new_chunk = new_chunk + 1 - linear_chunks;
 
-	/* TODO: use larger I/O size once we verify that kcopyd handles it */
+	/*
+	 * Use one (potentially large) I/O to copy all 'linear_chunks'
+	 * from the exception store to the origin
+	 */
+	io_size = linear_chunks * s->store->chunk_size;
 
 	dest.bdev = s->origin->bdev;
 	dest.sector = chunk_to_sector(s->store, old_chunk);
-	dest.count = min((sector_t)s->store->chunk_size,
-			 get_dev_size(dest.bdev) - dest.sector);
+	dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
 
 	src.bdev = s->cow->bdev;
 	src.sector = chunk_to_sector(s->store, new_chunk);
 	src.count = dest.count;
 
 test_again:
-	/* Reallocate other snapshots */
+	/* Reallocate other snapshots; must account for all 'linear_chunks' */
 	down_read(&_origins_lock);
 	o = __lookup_origin(s->origin->bdev);
 	must_wait = 0;
 	min_chunksize = __minimum_chunk_size(o);
 	if (min_chunksize) {
 		chunk_t n;
-		for (n = 0; n < s->store->chunk_size; n += min_chunksize) {
+		for (n = 0; n < io_size; n += min_chunksize) {
 			r = __origin_write(&o->snapshots, dest.sector + n,
 					   NULL);
 			if (r == DM_MAPIO_SUBMITTED)
@@ -737,11 +745,14 @@ test_again:
 
 	down_write(&s->lock);
 	s->merge_write_interlock = old_chunk;
-	s->merge_write_interlock_n = 1;
+	s->merge_write_interlock_n = linear_chunks;
 	up_write(&s->lock);
 
-	while (__chunk_is_tracked(s, old_chunk))
-		msleep(1);
+	/* Wait until writes to all 'linear_chunks' drain */
+	for (i = 0; i < linear_chunks; i++) {
+		while (__chunk_is_tracked(s, old_chunk + i))
+			msleep(1);
+	}
 
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
 	return;
-- 
1.6.5.rc2

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 04/17] dm snapshot: exception handover improvements
  2009-10-20 22:46 ` [PATCH v2 04/17] dm snapshot: exception handover improvements Mike Snitzer
@ 2009-10-23 15:01   ` Mike Snitzer
  0 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-10-23 15:01 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka, Alasdair G Kergon

On Tue, Oct 20 2009 at  6:46pm -0400,
Mike Snitzer <snitzer@redhat.com> wrote:

> Simplify exception handover so that the duplicate snapshot is found in
> one location (snapshot_ctr) and recorded in the associated (old and new)
> snapshots that will be participating in the handover.
> 
> Confines handover to be from a specific snapshot to another specific
> snapshot without additional __find_duplicate() calls.
> 
> Documents the exception handover state diagram with relevant comments
> in the associated code.

In testing --onactivate support I found that my revised handover
incorrectly assumed that the snapshot-merge target _always_ needs the
exceptions explicitly handed over to it.

In reality --onactivate doesn't require any handover at all.  The
snapshot_ctr() for such a merge is provided with the correct cow_path up
front; so no handover is needed.

I also found that I was missing some locking when changing the "old"
snapshot's 'handover_snap' reference.

The following incremental patch addresses both these oversights; I'll be
posting v3 of this "dm snapshot: exception handover improvements"
shortly (it will include these fixes).

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index fca2479..e0330e3 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -982,13 +982,13 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		}
 
 		/* cross reference snapshots that will do handover */
+		down_write(&dup->lock);
 		dup->handover_snap = s;
+		up_write(&dup->lock);
 		s->handover_snap = dup;
+
 		/* this new snapshot will accept the handover */
 		s->handover = 1;
-	} else if (is_merge(ti)) {
-		ti->error = "Unable to find snapshot that is to be merged";
-		goto bad_load_and_register;
 	}
 
 	/* Metadata must only be loaded into one table at once */

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot
  2009-10-20 22:46 ` [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot Mike Snitzer
@ 2009-11-05  0:56   ` Alasdair G Kergon
  2009-11-13 14:48     ` Mike Snitzer
  2009-11-16 18:17     ` Mike Snitzer
  0 siblings, 2 replies; 23+ messages in thread
From: Alasdair G Kergon @ 2009-11-05  0:56 UTC (permalink / raw)
  To: device-mapper development

On Tue, Oct 20, 2009 at 06:46:50PM -0400, Mike Snitzer wrote:
> Allow the snapshot target to be able to verify its state relative to a
> requested operation.  Allows for invalid operations to be prevented,
> e.g. an attempt handover an "old" snapshot's exceptions without it
> having been suspended first.
 
OK - same as we do in dm-raid1.

But just be aware that:
        /* This does not get reverted if there's an error later. */
        dm_table_presuspend_targets(map);

and so it's not identical to querying the dm_suspended() property directly.

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot
  2009-11-05  0:56   ` Alasdair G Kergon
@ 2009-11-13 14:48     ` Mike Snitzer
  2009-11-16 18:17     ` Mike Snitzer
  1 sibling, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2009-11-13 14:48 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: device-mapper development

On Wed, Nov 04 2009 at  7:56pm -0500,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Tue, Oct 20, 2009 at 06:46:50PM -0400, Mike Snitzer wrote:
> > Allow the snapshot target to be able to verify its state relative to a
> > requested operation.  Allows for invalid operations to be prevented,
> > e.g. an attempt handover an "old" snapshot's exceptions without it
> > having been suspended first.
>  
> OK - same as we do in dm-raid1.
> 
> But just be aware that:
>         /* This does not get reverted if there's an error later. */
>         dm_table_presuspend_targets(map);
> 
> and so it's not identical to querying the dm_suspended() property directly.

Alasdair,

I think it would be best to drop this patch.  We can accomplish the same
using Mike Anderson's new dm_table_md_suspended:
http://patchwork.kernel.org/patch/59743/

Dropping dm-snapshot-track-suspended-state-in-target.patch requires a
small tweak to
dm-snapshot-allow-live-exception-store-handover-between-tables.patch

The following just illustrates how minimal this change is.  For your
convenience I'll just send v8 of the handover patch that includes this
change (and it will also bump the snapshot target's version now that
dm-snapshot-track-suspended-state-in-target.patch is out of the
picture).
---

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index fce0c39..1543932 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1325,6 +1325,7 @@ static int snapshot_preresume(struct dm_target *ti)
 			up_write(&snap_src->lock);
 			goto normal_snapshot;
 		}
+		up_write(&snap_src->lock);
 		if (s == snap_src) {
 			/* handover source must not resume before destination */
 			DMERR("Unable to handover exceptions on "
@@ -1334,13 +1335,12 @@ static int snapshot_preresume(struct dm_target *ti)
 			r = -EINVAL;
 		} else {
 			/* if handover-destination, source must be suspended */
-			if (!snap_src->suspended) {
+			if (!dm_table_md_suspended(snap_src->ti->table)) {
 				DMERR("Unable to accept exceptions from a "
 				      "snapshot that is not suspended.");
 				r = -EINVAL;
 			}
 		}
-		up_write(&snap_src->lock);
 	}
 
 normal_snapshot:

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot
  2009-11-05  0:56   ` Alasdair G Kergon
  2009-11-13 14:48     ` Mike Snitzer
@ 2009-11-16 18:17     ` Mike Snitzer
  2009-11-17  2:24       ` Alasdair G Kergon
  1 sibling, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2009-11-16 18:17 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: device-mapper development

On Wed, Nov 04 2009 at  7:56pm -0500,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Tue, Oct 20, 2009 at 06:46:50PM -0400, Mike Snitzer wrote:
> > Allow the snapshot target to be able to verify its state relative to a
> > requested operation.  Allows for invalid operations to be prevented,
> > e.g. an attempt handover an "old" snapshot's exceptions without it
> > having been suspended first.
>  
> OK - same as we do in dm-raid1.
> 
> But just be aware that:
>         /* This does not get reverted if there's an error later. */
>         dm_table_presuspend_targets(map);
> 
> and so it's not identical to querying the dm_suspended() property directly.

Right, it is now clear that I should be using postsuspend.  The handover
patch has a need to know that another snapshot target has been
suspended; setting dm-snapshot's 'suspended' in .presuspend doesn't give
us that information.

So the dm-snapshot-track-suspended-state-in-target.patch that you have
queued should be changed with: s/presuspend/postsuspend/

(the same search and replace is needed in the v7 handover patch)

Mike

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot
  2009-11-16 18:17     ` Mike Snitzer
@ 2009-11-17  2:24       ` Alasdair G Kergon
  0 siblings, 0 replies; 23+ messages in thread
From: Alasdair G Kergon @ 2009-11-17  2:24 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: device-mapper development

On Mon, Nov 16, 2009 at 01:17:17PM -0500, Mike Snitzer wrote:
> So the dm-snapshot-track-suspended-state-in-target.patch that you have
> queued should be changed with: s/presuspend/postsuspend/
 
Changed.

As discussed earlier, as we have something that works, we'll leave the
suspended state tracking as-is for now.  Then once all the batches of
patches for the next kernel are signed off, we can return to this and
clean it up in all the targets.

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2009-11-17  2:24 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-10-20 22:46 [PATCH v2 00/17] snapshot-merge target Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 01/17] dm snapshot: rework writing to snapshot origin Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 02/17] dm snapshot: add suspended flag to dm_snapshot Mike Snitzer
2009-11-05  0:56   ` Alasdair G Kergon
2009-11-13 14:48     ` Mike Snitzer
2009-11-16 18:17     ` Mike Snitzer
2009-11-17  2:24       ` Alasdair G Kergon
2009-10-20 22:46 ` [PATCH v2 03/17] dm snapshot: initial support for exception handover Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 04/17] dm snapshot: exception handover improvements Mike Snitzer
2009-10-23 15:01   ` Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 05/17] dm exception store: do not read metadata if going to handover exceptions Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 06/17] dm exception store: add snapshot-merge specific methods Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 07/17] dm exception store: snapshot-merge usage accounting Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 08/17] dm snapshot: add snapshot-merge target Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 09/17] dm snapshot: merge target should not allocate new exceptions Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 10/17] dm snapshot: do not allow more than one merging snapshot Mike Snitzer
2009-10-20 22:46 ` [PATCH v2 11/17] dm snapshot: the merge procedure Mike Snitzer
2009-10-20 22:47 ` [PATCH v2 12/17] dm snapshot: queue writes to an area that is actively being merged Mike Snitzer
2009-10-20 22:47 ` [PATCH v2 13/17] dm snapshot: do not merge a chunk until active writes to it finish Mike Snitzer
2009-10-20 22:47 ` [PATCH v2 14/17] dm snapshot: make exceptions in other snapshots when merging Mike Snitzer
2009-10-20 22:47 ` [PATCH v2 15/17] dm snapshot: make exceptions if merge is dispatching to origin Mike Snitzer
2009-10-20 22:47 ` [PATCH v2 16/17] dm snapshot: redirect accesses to origin if merging snap invalidated Mike Snitzer
2009-10-20 22:47 ` [PATCH v2 17/17] dm snapshot: merge a linear region of chunks using one large IO Mike Snitzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.