[PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface
@ 2013-11-11 17:20 Mike Snitzer
  2013-11-11 17:20 ` [PATCH 1/6] dm cache: cache shrinking support Mike Snitzer
                   ` (5 more replies)
  0 siblings, 6 replies; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 17:20 UTC (permalink / raw)
  To: dm-devel

This patches reflect dm-cache changes that have been staged in
linux-next for the v3.13 merge.  I'm posting these to give a heads up
that we've chosen to resort to "plan-b" on the approach taken for
supporting cache block invalidation (while simpler in approach,
eliminates use of the shim/stack policy interface and the era shim
stacked ontop of the mq policy, it will require near-term follow-on
work to have a separate "era" target that sits between the SSD and
cache target -- this work will land in v3.14).

Joe will elaborate on the overall direction and approach associated
with "plan-b" shortly.  For now, the first step is a more minimalist
implementation for cache block invalidation for v3.13.

All patches for v3.13 can be found in the 'for-next' branch of the
linux-dm.git repo, see:
http://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/log/?h=for-next

Joe Thornber (6):
  dm cache: cache shrinking support
  dm cache: add passthrough mode
  dm cache metadata: check the metadata version when reading the superblock
  dm cache policy mq: reduce memory requirements
  dm cache: add remove_cblock method to policy interface
  dm cache: add cache block invalidation support

 Documentation/device-mapper/cache.txt |   31 ++-
 drivers/md/dm-cache-metadata.c        |   95 ++++++-
 drivers/md/dm-cache-metadata.h        |    5 +
 drivers/md/dm-cache-policy-internal.h |    5 +
 drivers/md/dm-cache-policy-mq.c       |  572 +++++++++++++++------------------
 drivers/md/dm-cache-policy.h          |   21 +-
 drivers/md/dm-cache-target.c          |  497 ++++++++++++++++++++++++++---
 7 files changed, 859 insertions(+), 367 deletions(-)

-- 
1.7.4.4

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 17:20 [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface Mike Snitzer
@ 2013-11-11 17:20 ` Mike Snitzer
  2013-11-11 21:19   ` Alasdair G Kergon
  2013-11-11 17:20 ` [PATCH 2/6] dm cache: add passthrough mode Mike Snitzer
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 17:20 UTC (permalink / raw)
  To: dm-devel

From: Joe Thornber <ejt@redhat.com>

Allow a cache to shrink if the blocks being removed from the cache are
not dirty.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-metadata.c |   66 ++++++++++++++++++++++++++++++++++++++++
 drivers/md/dm-cache-target.c   |   63 ++++++++++++++++++++++++++++++++-----
 2 files changed, 120 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 2262b4e..062b83e 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -667,19 +667,85 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
 	kfree(cmd);
 }
 
+/*
+ * Checks that the given cache block is either unmapped or clean.
+ */
+static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
+				   bool *result)
+{
+	int r;
+	__le64 value;
+	dm_oblock_t ob;
+	unsigned flags;
+
+	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
+	if (r) {
+		DMERR("block_unmapped_or_clean failed");
+		return r;
+	}
+
+	unpack_value(value, &ob, &flags);
+	*result = !((flags & M_VALID) && (flags & M_DIRTY));
+
+	return 0;
+}
+
+static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
+					dm_cblock_t begin, dm_cblock_t end,
+					bool *result)
+{
+	int r;
+	*result = true;
+
+	while (begin != end) {
+		r = block_unmapped_or_clean(cmd, begin, result);
+		if (r)
+			return r;
+
+		if (!*result) {
+			DMERR("cache block %llu is dirty",
+			      (unsigned long long) from_cblock(begin));
+			return 0;
+		}
+
+		begin = to_cblock(from_cblock(begin) + 1);
+	}
+
+	return 0;
+}
+
 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
 {
 	int r;
+	bool clean;
 	__le64 null_mapping = pack_value(0, 0);
 
 	down_write(&cmd->root_lock);
 	__dm_bless_for_disk(&null_mapping);
+
+	if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
+		r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
+		if (r) {
+			__dm_unbless_for_disk(&null_mapping);
+			goto out;
+		}
+
+		if (!clean) {
+			DMERR("unable to shrink cache due to dirty blocks");
+			r = -EINVAL;
+			__dm_unbless_for_disk(&null_mapping);
+			goto out;
+		}
+	}
+
 	r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
 			    from_cblock(new_cache_size),
 			    &null_mapping, &cmd->root);
 	if (!r)
 		cmd->cache_blocks = new_cache_size;
 	cmd->changed = true;
+
+out:
 	up_write(&cmd->root_lock);
 
 	return r;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 655994f..183dfc9 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2502,26 +2502,71 @@ static int load_discard(void *context, sector_t discard_block_size,
 	return 0;
 }
 
+static dm_cblock_t get_cache_dev_size(struct cache *cache)
+{
+	sector_t size = get_dev_size(cache->cache_dev);
+	(void) sector_div(size, cache->sectors_per_block);
+	return to_cblock(size);
+}
+
+static bool can_resize(struct cache *cache, dm_cblock_t new_size)
+{
+	if (from_cblock(new_size) > from_cblock(cache->cache_size))
+		return true;
+
+	/*
+	 * We can't drop a dirty block when shrinking the cache.
+	 */
+	while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
+		new_size = to_cblock(from_cblock(new_size) + 1);
+		if (is_dirty(cache, new_size)) {
+			DMERR("unable to shrink cache; cache block %llu is dirty",
+			      (unsigned long long) from_cblock(new_size));
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
+{
+	int r;
+
+	r = dm_cache_resize(cache->cmd, cache->cache_size);
+	if (r) {
+		DMERR("could not resize cache metadata");
+		return r;
+	}
+
+	cache->cache_size = new_size;
+
+	return 0;
+}
+
 static int cache_preresume(struct dm_target *ti)
 {
 	int r = 0;
 	struct cache *cache = ti->private;
-	sector_t actual_cache_size = get_dev_size(cache->cache_dev);
-	(void) sector_div(actual_cache_size, cache->sectors_per_block);
+	dm_cblock_t csize = get_cache_dev_size(cache);
 
 	/*
 	 * Check to see if the cache has resized.
 	 */
-	if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) {
-		cache->cache_size = to_cblock(actual_cache_size);
-
-		r = dm_cache_resize(cache->cmd, cache->cache_size);
-		if (r) {
-			DMERR("could not resize cache metadata");
+	if (!cache->sized) {
+		r = resize_cache_dev(cache, csize);
+		if (r)
 			return r;
-		}
 
 		cache->sized = true;
+
+	} else if (csize != cache->cache_size) {
+		if (!can_resize(cache, csize))
+			return -EINVAL;
+
+		r = resize_cache_dev(cache, csize);
+		if (r)
+			return r;
 	}
 
 	if (!cache->loaded_mappings) {
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/6] dm cache: add passthrough mode
  2013-11-11 17:20 [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface Mike Snitzer
  2013-11-11 17:20 ` [PATCH 1/6] dm cache: cache shrinking support Mike Snitzer
@ 2013-11-11 17:20 ` Mike Snitzer
  2013-11-11 23:40   ` Alasdair G Kergon
  2013-11-11 17:20 ` [PATCH 3/6] dm cache metadata: check the metadata version when reading the superblock Mike Snitzer
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 17:20 UTC (permalink / raw)
  To: dm-devel

From: Joe Thornber <ejt@redhat.com>

"Passthrough" is a dm-cache operating mode (like writethrough or
writeback) which is intended to be used when the cache contents are not
known to be coherent with the origin device.  It behaves as follows:

* All reads are served from the origin device (all reads miss the cache)
* All writes are forwarded to the origin device; additionally, write
  hits cause cache block invalidates

This mode decouples cache coherency checks from cache device creation,
largely to avoid having to perform coherency checks while booting.  Boot
scripts can create cache devices in passthrough mode and put them into
service (mount cached filesystems, for example) without having to worry
about coherency.  Coherency that exists is maintained, although the
cache will gradually cool as writes take place.

Later, applications can perform coherency checks, the nature of which
will depend on the type of the underlying storage.  If coherency can be
verified, the cache device can be transitioned to writethrough or
writeback mode while still warm; otherwise, the cache contents can be
discarded prior to transitioning to the desired operating mode.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Morgan Mears <Morgan.Mears@netapp.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 Documentation/device-mapper/cache.txt |   19 +++-
 drivers/md/dm-cache-metadata.c        |    5 +
 drivers/md/dm-cache-metadata.h        |    5 +
 drivers/md/dm-cache-target.c          |  209 +++++++++++++++++++++++++++------
 4 files changed, 200 insertions(+), 38 deletions(-)

diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
index 33d45ee..ff6639f 100644
--- a/Documentation/device-mapper/cache.txt
+++ b/Documentation/device-mapper/cache.txt
@@ -68,10 +68,11 @@ So large block sizes are bad because they waste cache space.  And small
 block sizes are bad because they increase the amount of metadata (both
 in core and on disk).
 
-Writeback/writethrough
-----------------------
+Cache operating modes
+---------------------
 
-The cache has two modes, writeback and writethrough.
+The cache has three operating modes: writeback, writethrough and
+passthrough.
 
 If writeback, the default, is selected then a write to a block that is
 cached will go only to the cache and the block will be marked dirty in
@@ -81,6 +82,18 @@ If writethrough is selected then a write to a cached block will not
 complete until it has hit both the origin and cache devices.  Clean
 blocks should remain clean.
 
+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates.  Passthrough mode allows a cache device to be
+activated without having to worry about coherency.  Coherency that
+exists is maintained, although the cache will gradually cool as writes
+take place.  If the coherency of the cache can later be verified, or
+established, the cache device can can be transitioned to writethrough or
+writeback mode while still warm.  Otherwise, the cache contents can be
+discarded prior to transitioning to the desired operating mode.
+
 A simple cleaner policy is provided, which will clean (write back) all
 dirty blocks in a cache.  Useful for decommissioning a cache.
 
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 062b83e..8601425 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1249,3 +1249,8 @@ int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
 
 	return r;
 }
+
+int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
+{
+	return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
+}
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index f45cef2..cd906f1 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -137,6 +137,11 @@ int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *
 int dm_cache_save_hint(struct dm_cache_metadata *cmd,
 		       dm_cblock_t cblock, uint32_t hint);
 
+/*
+ * Query method.  Are all the blocks in the cache clean?
+ */
+int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result);
+
 /*----------------------------------------------------------------*/
 
 #endif /* DM_CACHE_METADATA_H */
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 183dfc9..8c02177 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -104,14 +104,37 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
 /*
  * FIXME: the cache is read/write for the time being.
  */
-enum cache_mode {
+enum cache_metadata_mode {
 	CM_WRITE,		/* metadata may be changed */
 	CM_READ_ONLY,		/* metadata may not be changed */
 };
 
+enum cache_io_mode {
+	/*
+	 * Data is written to cached blocks only.  These blocks are marked
+	 * dirty.  If you lose the cache device you will lose data.
+	 * Potential performance increase for both reads and writes.
+	 */
+	CM_IO_WRITEBACK,
+
+	/*
+	 * Data is written to both cache and origin.  Blocks are never
+	 * dirty.  Potential performance benfit for reads only.
+	 */
+	CM_IO_WRITETHROUGH,
+
+	/*
+	 * A degraded mode useful for various cache coherency situations
+	 * (eg, rolling back snapshots).  Reads and writes always go to the
+	 * origin.  If a write goes to a cached oblock, then the cache
+	 * block is invalidated.
+	 */
+	CM_IO_PASSTHROUGH
+};
+
 struct cache_features {
-	enum cache_mode mode;
-	bool write_through:1;
+	enum cache_metadata_mode mode;
+	enum cache_io_mode io_mode;
 };
 
 struct cache_stats {
@@ -565,9 +588,24 @@ static void save_stats(struct cache *cache)
 #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
 #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
 
+static bool writethrough_mode(struct cache_features *f)
+{
+	return f->io_mode == CM_IO_WRITETHROUGH;
+}
+
+static bool writeback_mode(struct cache_features *f)
+{
+	return f->io_mode == CM_IO_WRITEBACK;
+}
+
+static bool passthrough_mode(struct cache_features *f)
+{
+	return f->io_mode == CM_IO_PASSTHROUGH;
+}
+
 static size_t get_per_bio_data_size(struct cache *cache)
 {
-	return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
+	return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
 }
 
 static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
@@ -1135,6 +1173,32 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
 	quiesce_migration(mg);
 }
 
+/*
+ * Invalidate a cache entry.  No writeback occurs; any changes in the cache
+ * block are thrown away.
+ */
+static void invalidate(struct cache *cache, struct prealloc *structs,
+		       dm_oblock_t oblock, dm_cblock_t cblock,
+		       struct dm_bio_prison_cell *cell)
+{
+	struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+	mg->err = false;
+	mg->writeback = false;
+	mg->demote = true;
+	mg->promote = false;
+	mg->requeue_holder = true;
+	mg->cache = cache;
+	mg->old_oblock = oblock;
+	mg->cblock = cblock;
+	mg->old_ocell = cell;
+	mg->new_ocell = NULL;
+	mg->start_jiffies = jiffies;
+
+	inc_nr_migrations(cache);
+	quiesce_migration(mg);
+}
+
 /*----------------------------------------------------------------
  * bio processing
  *--------------------------------------------------------------*/
@@ -1197,13 +1261,6 @@ static bool spare_migration_bandwidth(struct cache *cache)
 	return current_volume < cache->migration_threshold;
 }
 
-static bool is_writethrough_io(struct cache *cache, struct bio *bio,
-			       dm_cblock_t cblock)
-{
-	return bio_data_dir(bio) == WRITE &&
-		cache->features.write_through && !is_dirty(cache, cblock);
-}
-
 static void inc_hit_counter(struct cache *cache, struct bio *bio)
 {
 	atomic_inc(bio_data_dir(bio) == READ ?
@@ -1216,6 +1273,15 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
 		   &cache->stats.read_miss : &cache->stats.write_miss);
 }
 
+static void issue_cache_bio(struct cache *cache, struct bio *bio,
+			    struct per_bio_data *pb,
+			    dm_oblock_t oblock, dm_cblock_t cblock)
+{
+	pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+	remap_to_cache_dirty(cache, bio, oblock, cblock);
+	issue(cache, bio);
+}
+
 static void process_bio(struct cache *cache, struct prealloc *structs,
 			struct bio *bio)
 {
@@ -1227,7 +1293,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 	size_t pb_data_size = get_per_bio_data_size(cache);
 	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 	bool discarded_block = is_discarded_oblock(cache, block);
-	bool can_migrate = discarded_block || spare_migration_bandwidth(cache);
+	bool passthrough = passthrough_mode(&cache->features);
+	bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
 
 	/*
 	 * Check to see if that block is currently migrating.
@@ -1248,15 +1315,39 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 
 	switch (lookup_result.op) {
 	case POLICY_HIT:
-		inc_hit_counter(cache, bio);
-		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+		if (passthrough) {
+			inc_miss_counter(cache, bio);
 
-		if (is_writethrough_io(cache, bio, lookup_result.cblock))
-			remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
-		else
-			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+			/*
+			 * Passthrough always maps to the origin,
+			 * invalidating any cache blocks that are written
+			 * to.
+			 */
+
+			if (bio_data_dir(bio) == WRITE) {
+				atomic_inc(&cache->stats.demotion);
+				invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
+				release_cell = false;
+
+			} else {
+				/* FIXME: factor out issue_origin() */
+				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+				remap_to_origin_clear_discard(cache, bio, block);
+				issue(cache, bio);
+			}
+		} else {
+			inc_hit_counter(cache, bio);
+
+			if (bio_data_dir(bio) == WRITE &&
+			    writethrough_mode(&cache->features) &&
+			    !is_dirty(cache, lookup_result.cblock)) {
+				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+				remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+				issue(cache, bio);
+			} else
+				issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+		}
 
-		issue(cache, bio);
 		break;
 
 	case POLICY_MISS:
@@ -1807,7 +1898,7 @@ static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
 static void init_features(struct cache_features *cf)
 {
 	cf->mode = CM_WRITE;
-	cf->write_through = false;
+	cf->io_mode = CM_IO_WRITEBACK;
 }
 
 static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
@@ -1832,10 +1923,13 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
 		arg = dm_shift_arg(as);
 
 		if (!strcasecmp(arg, "writeback"))
-			cf->write_through = false;
+			cf->io_mode = CM_IO_WRITEBACK;
 
 		else if (!strcasecmp(arg, "writethrough"))
-			cf->write_through = true;
+			cf->io_mode = CM_IO_WRITETHROUGH;
+
+		else if (!strcasecmp(arg, "passthrough"))
+			cf->io_mode = CM_IO_PASSTHROUGH;
 
 		else {
 			*error = "Unrecognised cache feature requested";
@@ -2088,6 +2182,22 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	}
 	cache->cmd = cmd;
 
+	if (passthrough_mode(&cache->features)) {
+		bool all_clean;
+
+		r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
+		if (r) {
+			*error = "dm_cache_metadata_all_clean() failed";
+			goto bad;
+		}
+
+		if (!all_clean) {
+			*error = "Cannot enter passthrough mode unless all blocks are clean";
+			r = -EINVAL;
+			goto bad;
+		}
+	}
+
 	spin_lock_init(&cache->lock);
 	bio_list_init(&cache->deferred_bios);
 	bio_list_init(&cache->deferred_flush_bios);
@@ -2303,17 +2413,37 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 		return DM_MAPIO_SUBMITTED;
 	}
 
+	r = DM_MAPIO_REMAPPED;
 	switch (lookup_result.op) {
 	case POLICY_HIT:
-		inc_hit_counter(cache, bio);
-		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+		if (passthrough_mode(&cache->features)) {
+			if (bio_data_dir(bio) == WRITE) {
+				/*
+				 * We need to invalidate this block, so
+				 * defer for the worker thread.
+				 */
+				cell_defer(cache, cell, true);
+				r = DM_MAPIO_SUBMITTED;
+
+			} else {
+				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+				inc_miss_counter(cache, bio);
+				remap_to_origin_clear_discard(cache, bio, block);
+
+				cell_defer(cache, cell, false);
+			}
 
-		if (is_writethrough_io(cache, bio, lookup_result.cblock))
-			remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
-		else
-			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+		} else {
+			inc_hit_counter(cache, bio);
+
+			if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
+			    !is_dirty(cache, lookup_result.cblock))
+				remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+			else
+				remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
 
-		cell_defer(cache, cell, false);
+			cell_defer(cache, cell, false);
+		}
 		break;
 
 	case POLICY_MISS:
@@ -2338,10 +2468,10 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 		DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
 			    (unsigned) lookup_result.op);
 		bio_io_error(bio);
-		return DM_MAPIO_SUBMITTED;
+		r = DM_MAPIO_SUBMITTED;
 	}
 
-	return DM_MAPIO_REMAPPED;
+	return r;
 }
 
 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
@@ -2659,10 +2789,19 @@ static void cache_status(struct dm_target *ti, status_type_t type,
 		       (unsigned long long) from_cblock(residency),
 		       cache->nr_dirty);
 
-		if (cache->features.write_through)
+		if (writethrough_mode(&cache->features))
 			DMEMIT("1 writethrough ");
-		else
-			DMEMIT("0 ");
+
+		else if (passthrough_mode(&cache->features))
+			DMEMIT("1 passthrough ");
+
+		else if (writeback_mode(&cache->features))
+			DMEMIT("1 writeback ");
+
+		else {
+			DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
+			goto err;
+		}
 
 		DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
 		if (sz < maxlen) {
@@ -2771,7 +2910,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type cache_target = {
 	.name = "cache",
-	.version = {1, 1, 1},
+	.version = {1, 2, 0},
 	.module = THIS_MODULE,
 	.ctr = cache_ctr,
 	.dtr = cache_dtr,
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/6] dm cache metadata: check the metadata version when reading the superblock
  2013-11-11 17:20 [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface Mike Snitzer
  2013-11-11 17:20 ` [PATCH 1/6] dm cache: cache shrinking support Mike Snitzer
  2013-11-11 17:20 ` [PATCH 2/6] dm cache: add passthrough mode Mike Snitzer
@ 2013-11-11 17:20 ` Mike Snitzer
  2013-11-11 17:20 ` [PATCH 4/6] dm cache policy mq: reduce memory requirements Mike Snitzer
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 17:20 UTC (permalink / raw)
  To: dm-devel

From: Joe Thornber <ejt@redhat.com>

Need to check the version to verify on-disk metadata is supported.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-metadata.c |   24 +++++++++++++++++++++---
 1 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 8601425..9ef0752 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -20,7 +20,13 @@
 
 #define CACHE_SUPERBLOCK_MAGIC 06142003
 #define CACHE_SUPERBLOCK_LOCATION 0
-#define CACHE_VERSION 1
+
+/*
+ * defines a range of metadata versions that this module can handle.
+ */
+#define MIN_CACHE_VERSION 1
+#define MAX_CACHE_VERSION 1
+
 #define CACHE_METADATA_CACHE_SIZE 64
 
 /*
@@ -134,6 +140,18 @@ static void sb_prepare_for_write(struct dm_block_validator *v,
 						      SUPERBLOCK_CSUM_XOR));
 }
 
+static int check_metadata_version(struct cache_disk_superblock *disk_super)
+{
+	uint32_t metadata_version = le32_to_cpu(disk_super->version);
+	if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
+		DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
+		      metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int sb_check(struct dm_block_validator *v,
 		    struct dm_block *b,
 		    size_t sb_block_size)
@@ -164,7 +182,7 @@ static int sb_check(struct dm_block_validator *v,
 		return -EILSEQ;
 	}
 
-	return 0;
+	return check_metadata_version(disk_super);
 }
 
 static struct dm_block_validator sb_validator = {
@@ -270,7 +288,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 	disk_super->flags = 0;
 	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
 	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
-	disk_super->version = cpu_to_le32(CACHE_VERSION);
+	disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
 	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
 	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
 	disk_super->policy_hint_size = 0;
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/6] dm cache policy mq: reduce memory requirements
  2013-11-11 17:20 [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface Mike Snitzer
                   ` (2 preceding siblings ...)
  2013-11-11 17:20 ` [PATCH 3/6] dm cache metadata: check the metadata version when reading the superblock Mike Snitzer
@ 2013-11-11 17:20 ` Mike Snitzer
  2013-11-11 17:20 ` [PATCH 5/6] dm cache: add remove_cblock method to policy interface Mike Snitzer
  2013-11-11 17:20 ` [PATCH 6/6] dm cache: add cache block invalidation support Mike Snitzer
  5 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 17:20 UTC (permalink / raw)
  To: dm-devel

From: Joe Thornber <ejt@redhat.com>

Rather than storing the cblock in each cache entry, we allocate all
entries in an array and infer the cblock from the entry position.

Saves 4 bytes of memory per cache block.  In addition, this gives us an
easy way of looking up cache entries by cblock.

We no longer need to keep an explicit bitset to track which cblocks
have been allocated.  And no searching is needed to find free cblocks.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-mq.c |  543 +++++++++++++++++----------------------
 1 files changed, 231 insertions(+), 312 deletions(-)

diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 444f0bf..782bf85 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -26,19 +26,6 @@ static unsigned next_power(unsigned n, unsigned min)
 
 /*----------------------------------------------------------------*/
 
-static unsigned long *alloc_bitset(unsigned nr_entries)
-{
-	size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
-	return vzalloc(s);
-}
-
-static void free_bitset(unsigned long *bits)
-{
-	vfree(bits);
-}
-
-/*----------------------------------------------------------------*/
-
 /*
  * Large, sequential ios are probably better left on the origin device since
  * spindles tend to have good bandwidth.
@@ -233,18 +220,107 @@ struct entry {
 	struct hlist_node hlist;
 	struct list_head list;
 	dm_oblock_t oblock;
-	dm_cblock_t cblock;	/* valid iff in_cache */
 
 	/*
 	 * FIXME: pack these better
 	 */
-	bool in_cache:1;
 	bool dirty:1;
 	unsigned hit_count;
 	unsigned generation;
 	unsigned tick;
 };
 
+/*
+ * Rather than storing the cblock in an entry, we allocate all entries in
+ * an array, and infer the cblock from the entry position.
+ *
+ * Free entries are linked together into a list.
+ */
+struct entry_pool {
+	struct entry *entries, *entries_end;
+	struct list_head free;
+	unsigned nr_allocated;
+};
+
+static int epool_init(struct entry_pool *ep, unsigned nr_entries)
+{
+	unsigned i;
+
+	ep->entries = vzalloc(sizeof(struct entry) * nr_entries);
+	if (!ep->entries)
+		return -ENOMEM;
+
+	ep->entries_end = ep->entries + nr_entries;
+
+	INIT_LIST_HEAD(&ep->free);
+	for (i = 0; i < nr_entries; i++)
+		list_add(&ep->entries[i].list, &ep->free);
+
+	ep->nr_allocated = 0;
+
+	return 0;
+}
+
+static void epool_exit(struct entry_pool *ep)
+{
+	vfree(ep->entries);
+}
+
+static struct entry *alloc_entry(struct entry_pool *ep)
+{
+	struct entry *e;
+
+	if (list_empty(&ep->free))
+		return NULL;
+
+	e = list_entry(list_pop(&ep->free), struct entry, list);
+	INIT_LIST_HEAD(&e->list);
+	INIT_HLIST_NODE(&e->hlist);
+	ep->nr_allocated++;
+
+	return e;
+}
+
+/*
+ * This assumes the cblock hasn't already been allocated.
+ */
+static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock)
+{
+	struct entry *e = ep->entries + from_cblock(cblock);
+	list_del(&e->list);
+
+	INIT_LIST_HEAD(&e->list);
+	INIT_HLIST_NODE(&e->hlist);
+	ep->nr_allocated++;
+
+	return e;
+}
+
+static void free_entry(struct entry_pool *ep, struct entry *e)
+{
+	BUG_ON(!ep->nr_allocated);
+	ep->nr_allocated--;
+	INIT_HLIST_NODE(&e->hlist);
+	list_add(&e->list, &ep->free);
+}
+
+static bool epool_empty(struct entry_pool *ep)
+{
+	return list_empty(&ep->free);
+}
+
+static bool in_pool(struct entry_pool *ep, struct entry *e)
+{
+	return e >= ep->entries && e < ep->entries_end;
+}
+
+static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e)
+{
+	return to_cblock(e - ep->entries);
+}
+
+/*----------------------------------------------------------------*/
+
 struct mq_policy {
 	struct dm_cache_policy policy;
 
@@ -254,6 +330,13 @@ struct mq_policy {
 	struct io_tracker tracker;
 
 	/*
+	 * Entries come from two pools, one of pre-cache entries, and one
+	 * for the cache proper.
+	 */
+	struct entry_pool pre_cache_pool;
+	struct entry_pool cache_pool;
+
+	/*
 	 * We maintain three queues of entries.  The cache proper,
 	 * consisting of a clean and dirty queue, contains the currently
 	 * active mappings.  Whereas the pre_cache tracks blocks that
@@ -300,25 +383,6 @@ struct mq_policy {
 	unsigned promote_threshold;
 
 	/*
-	 * We need cache_size entries for the cache, and choose to have
-	 * cache_size entries for the pre_cache too.  One motivation for
-	 * using the same size is to make the hit counts directly
-	 * comparable between pre_cache and cache.
-	 */
-	unsigned nr_entries;
-	unsigned nr_entries_allocated;
-	struct list_head free;
-
-	/*
-	 * Cache blocks may be unallocated.  We store this info in a
-	 * bitset.
-	 */
-	unsigned long *allocation_bitset;
-	unsigned nr_cblocks_allocated;
-	unsigned find_free_nr_words;
-	unsigned find_free_last_word;
-
-	/*
 	 * The hash table allows us to quickly find an entry by origin
 	 * block.  Both pre_cache and cache entries are in here.
 	 */
@@ -328,50 +392,6 @@ struct mq_policy {
 };
 
 /*----------------------------------------------------------------*/
-/* Free/alloc mq cache entry structures. */
-static void concat_queue(struct list_head *lh, struct queue *q)
-{
-	unsigned level;
-
-	for (level = 0; level < NR_QUEUE_LEVELS; level++)
-		list_splice(q->qs + level, lh);
-}
-
-static void free_entries(struct mq_policy *mq)
-{
-	struct entry *e, *tmp;
-
-	concat_queue(&mq->free, &mq->pre_cache);
-	concat_queue(&mq->free, &mq->cache_clean);
-	concat_queue(&mq->free, &mq->cache_dirty);
-
-	list_for_each_entry_safe(e, tmp, &mq->free, list)
-		kmem_cache_free(mq_entry_cache, e);
-}
-
-static int alloc_entries(struct mq_policy *mq, unsigned elts)
-{
-	unsigned u = mq->nr_entries;
-
-	INIT_LIST_HEAD(&mq->free);
-	mq->nr_entries_allocated = 0;
-
-	while (u--) {
-		struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL);
-
-		if (!e) {
-			free_entries(mq);
-			return -ENOMEM;
-		}
-
-
-		list_add(&e->list, &mq->free);
-	}
-
-	return 0;
-}
-
-/*----------------------------------------------------------------*/
 
 /*
  * Simple hash table implementation.  Should replace with the standard hash
@@ -407,54 +427,9 @@ static void hash_remove(struct entry *e)
 
 /*----------------------------------------------------------------*/
 
-/*
- * Allocates a new entry structure.  The memory is allocated in one lump,
- * so we just handing it out here.  Returns NULL if all entries have
- * already been allocated.  Cannot fail otherwise.
- */
-static struct entry *alloc_entry(struct mq_policy *mq)
-{
-	struct entry *e;
-
-	if (mq->nr_entries_allocated >= mq->nr_entries) {
-		BUG_ON(!list_empty(&mq->free));
-		return NULL;
-	}
-
-	e = list_entry(list_pop(&mq->free), struct entry, list);
-	INIT_LIST_HEAD(&e->list);
-	INIT_HLIST_NODE(&e->hlist);
-
-	mq->nr_entries_allocated++;
-	return e;
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * Mark cache blocks allocated or not in the bitset.
- */
-static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock)
-{
-	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
-	BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset));
-
-	set_bit(from_cblock(cblock), mq->allocation_bitset);
-	mq->nr_cblocks_allocated++;
-}
-
-static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock)
-{
-	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
-	BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset));
-
-	clear_bit(from_cblock(cblock), mq->allocation_bitset);
-	mq->nr_cblocks_allocated--;
-}
-
 static bool any_free_cblocks(struct mq_policy *mq)
 {
-	return mq->nr_cblocks_allocated < from_cblock(mq->cache_size);
+	return !epool_empty(&mq->cache_pool);
 }
 
 static bool any_clean_cblocks(struct mq_policy *mq)
@@ -462,48 +437,6 @@ static bool any_clean_cblocks(struct mq_policy *mq)
 	return !queue_empty(&mq->cache_clean);
 }
 
-/*
- * Fills result out with a cache block that isn't in use, or return
- * -ENOSPC.  This does _not_ mark the cblock as allocated, the caller is
- * reponsible for that.
- */
-static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end,
-			      dm_cblock_t *result, unsigned *last_word)
-{
-	int r = -ENOSPC;
-	unsigned w;
-
-	for (w = begin; w < end; w++) {
-		/*
-		 * ffz is undefined if no zero exists
-		 */
-		if (mq->allocation_bitset[w] != ~0UL) {
-			*last_word = w;
-			*result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w]));
-			if (from_cblock(*result) < from_cblock(mq->cache_size))
-				r = 0;
-
-			break;
-		}
-	}
-
-	return r;
-}
-
-static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result)
-{
-	int r;
-
-	if (!any_free_cblocks(mq))
-		return -ENOSPC;
-
-	r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word);
-	if (r == -ENOSPC && mq->find_free_last_word)
-		r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word);
-
-	return r;
-}
-
 /*----------------------------------------------------------------*/
 
 /*
@@ -520,34 +453,35 @@ static unsigned queue_level(struct entry *e)
 	return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
 }
 
+static bool in_cache(struct mq_policy *mq, struct entry *e)
+{
+	return in_pool(&mq->cache_pool, e);
+}
+
 /*
  * Inserts the entry into the pre_cache or the cache.  Ensures the cache
- * block is marked as allocated if necc.  Inserts into the hash table.  Sets the
- * tick which records when the entry was last moved about.
+ * block is marked as allocated if necc.  Inserts into the hash table.
+ * Sets the tick which records when the entry was last moved about.
  */
 static void push(struct mq_policy *mq, struct entry *e)
 {
 	e->tick = mq->tick;
 	hash_insert(mq, e);
 
-	if (e->in_cache) {
-		alloc_cblock(mq, e->cblock);
+	if (in_cache(mq, e))
 		queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
 			   queue_level(e), &e->list);
-	} else
+	else
 		queue_push(&mq->pre_cache, queue_level(e), &e->list);
 }
 
 /*
  * Removes an entry from pre_cache or cache.  Removes from the hash table.
- * Frees off the cache block if necc.
  */
 static void del(struct mq_policy *mq, struct entry *e)
 {
 	queue_remove(&e->list);
 	hash_remove(e);
-	if (e->in_cache)
-		free_cblock(mq, e->cblock);
 }
 
 /*
@@ -564,8 +498,6 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q)
 
 	e = container_of(h, struct entry, list);
 	hash_remove(e);
-	if (e->in_cache)
-		free_cblock(mq, e->cblock);
 
 	return e;
 }
@@ -599,9 +531,7 @@ static void check_generation(struct mq_policy *mq)
 	struct list_head *head;
 	struct entry *e;
 
-	if ((mq->hit_count >= mq->generation_period) &&
-	    (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) {
-
+	if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) {
 		mq->hit_count = 0;
 		mq->generation++;
 
@@ -668,7 +598,7 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
  * - set the hit count to a hard coded value other than 1, eg, is it better
  *   if it goes in at level 2?
  */
-static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t *cblock)
+static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
 {
 	struct entry *demoted = pop(mq, &mq->cache_clean);
 
@@ -682,12 +612,14 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t
 		 */
 		return -ENOSPC;
 
-	*cblock = demoted->cblock;
 	*oblock = demoted->oblock;
-	demoted->in_cache = false;
-	demoted->dirty = false;
-	demoted->hit_count = 1;
-	push(mq, demoted);
+	free_entry(&mq->cache_pool, demoted);
+
+	/*
+	 * We used to put the demoted block into the pre-cache, but I think
+	 * it's simpler to just let it work it's way up from zero again.
+	 * Stops blocks flickering in and out of the cache.
+	 */
 
 	return 0;
 }
@@ -735,9 +667,9 @@ static int cache_entry_found(struct mq_policy *mq,
 {
 	requeue_and_update_tick(mq, e);
 
-	if (e->in_cache) {
+	if (in_cache(mq, e)) {
 		result->op = POLICY_HIT;
-		result->cblock = e->cblock;
+		result->cblock = infer_cblock(&mq->cache_pool, e);
 	}
 
 	return 0;
@@ -751,11 +683,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
 			      struct policy_result *result)
 {
 	int r;
-	dm_cblock_t cblock;
+	struct entry *new_e;
 
-	if (find_free_cblock(mq, &cblock) == -ENOSPC) {
+	/* Ensure there's a free cblock in the cache */
+	if (epool_empty(&mq->cache_pool)) {
 		result->op = POLICY_REPLACE;
-		r = demote_cblock(mq, &result->old_oblock, &cblock);
+		r = demote_cblock(mq, &result->old_oblock);
 		if (r) {
 			result->op = POLICY_MISS;
 			return 0;
@@ -763,12 +696,20 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
 	} else
 		result->op = POLICY_NEW;
 
-	result->cblock = e->cblock = cblock;
+	new_e = alloc_entry(&mq->cache_pool);
+	BUG_ON(!new_e);
+
+	new_e->oblock = e->oblock;
+	new_e->dirty = false;
+	new_e->hit_count = e->hit_count;
+	new_e->generation = e->generation;
+	new_e->tick = e->tick;
 
 	del(mq, e);
-	e->in_cache = true;
-	e->dirty = false;
-	push(mq, e);
+	free_entry(&mq->pre_cache_pool, e);
+	push(mq, new_e);
+
+	result->cblock = infer_cblock(&mq->cache_pool, new_e);
 
 	return 0;
 }
@@ -793,21 +734,10 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
 	return r;
 }
 
-static void insert_entry_in_pre_cache(struct mq_policy *mq,
-				      struct entry *e, dm_oblock_t oblock)
-{
-	e->in_cache = false;
-	e->dirty = false;
-	e->oblock = oblock;
-	e->hit_count = 1;
-	e->generation = mq->generation;
-	push(mq, e);
-}
-
 static void insert_in_pre_cache(struct mq_policy *mq,
 				dm_oblock_t oblock)
 {
-	struct entry *e = alloc_entry(mq);
+	struct entry *e = alloc_entry(&mq->pre_cache_pool);
 
 	if (!e)
 		/*
@@ -821,7 +751,11 @@ static void insert_in_pre_cache(struct mq_policy *mq,
 		return;
 	}
 
-	insert_entry_in_pre_cache(mq, e, oblock);
+	e->dirty = false;
+	e->oblock = oblock;
+	e->hit_count = 1;
+	e->generation = mq->generation;
+	push(mq, e);
 }
 
 static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
@@ -829,10 +763,10 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
 {
 	int r;
 	struct entry *e;
-	dm_cblock_t cblock;
 
-	if (find_free_cblock(mq, &cblock) == -ENOSPC) {
-		r = demote_cblock(mq, &result->old_oblock, &cblock);
+	if (epool_empty(&mq->cache_pool)) {
+		result->op = POLICY_REPLACE;
+		r = demote_cblock(mq, &result->old_oblock);
 		if (unlikely(r)) {
 			result->op = POLICY_MISS;
 			insert_in_pre_cache(mq, oblock);
@@ -842,31 +776,21 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
 		/*
 		 * This will always succeed, since we've just demoted.
 		 */
-		e = pop(mq, &mq->pre_cache);
-		result->op = POLICY_REPLACE;
+		e = alloc_entry(&mq->cache_pool);
+		BUG_ON(!e);
 
 	} else {
-		e = alloc_entry(mq);
-		if (unlikely(!e))
-			e = pop(mq, &mq->pre_cache);
-
-		if (unlikely(!e)) {
-			result->op = POLICY_MISS;
-			return;
-		}
-
+		e = alloc_entry(&mq->cache_pool);
 		result->op = POLICY_NEW;
 	}
 
 	e->oblock = oblock;
-	e->cblock = cblock;
-	e->in_cache = true;
 	e->dirty = false;
 	e->hit_count = 1;
 	e->generation = mq->generation;
 	push(mq, e);
 
-	result->cblock = e->cblock;
+	result->cblock = infer_cblock(&mq->cache_pool, e);
 }
 
 static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
@@ -897,13 +821,16 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock,
 	int r = 0;
 	struct entry *e = hash_lookup(mq, oblock);
 
-	if (e && e->in_cache)
+	if (e && in_cache(mq, e))
 		r = cache_entry_found(mq, e, result);
+
 	else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
 		result->op = POLICY_MISS;
+
 	else if (e)
 		r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
 					  data_dir, result);
+
 	else
 		r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
 				   data_dir, result);
@@ -930,9 +857,9 @@ static void mq_destroy(struct dm_cache_policy *p)
 {
 	struct mq_policy *mq = to_mq_policy(p);
 
-	free_bitset(mq->allocation_bitset);
 	kfree(mq->table);
-	free_entries(mq);
+	epool_exit(&mq->cache_pool);
+	epool_exit(&mq->pre_cache_pool);
 	kfree(mq);
 }
 
@@ -980,8 +907,8 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
 		return -EWOULDBLOCK;
 
 	e = hash_lookup(mq, oblock);
-	if (e && e->in_cache) {
-		*cblock = e->cblock;
+	if (e && in_cache(mq, e)) {
+		*cblock = infer_cblock(&mq->cache_pool, e);
 		r = 0;
 	} else
 		r = -ENOENT;
@@ -991,38 +918,34 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
 	return r;
 }
 
-/*
- * FIXME: __mq_set_clear_dirty can block due to mutex.
- * Ideally a policy should not block in functions called
- * from the map() function.  Explore using RCU.
- */
-static void __mq_set_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool set)
+static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set)
 {
-	struct mq_policy *mq = to_mq_policy(p);
 	struct entry *e;
 
-	mutex_lock(&mq->lock);
 	e = hash_lookup(mq, oblock);
-	if (!e)
-		DMWARN("__mq_set_clear_dirty called for a block that isn't in the cache");
-	else {
-		BUG_ON(!e->in_cache);
+	BUG_ON(!e || !in_cache(mq, e));
 
-		del(mq, e);
-		e->dirty = set;
-		push(mq, e);
-	}
-	mutex_unlock(&mq->lock);
+	del(mq, e);
+	e->dirty = set;
+	push(mq, e);
 }
 
 static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
 {
-	__mq_set_clear_dirty(p, oblock, true);
+	struct mq_policy *mq = to_mq_policy(p);
+
+	mutex_lock(&mq->lock);
+	__mq_set_clear_dirty(mq, oblock, true);
+	mutex_unlock(&mq->lock);
 }
 
 static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
 {
-	__mq_set_clear_dirty(p, oblock, false);
+	struct mq_policy *mq = to_mq_policy(p);
+
+	mutex_lock(&mq->lock);
+	__mq_set_clear_dirty(mq, oblock, false);
+	mutex_unlock(&mq->lock);
 }
 
 static int mq_load_mapping(struct dm_cache_policy *p,
@@ -1032,13 +955,8 @@ static int mq_load_mapping(struct dm_cache_policy *p,
 	struct mq_policy *mq = to_mq_policy(p);
 	struct entry *e;
 
-	e = alloc_entry(mq);
-	if (!e)
-		return -ENOMEM;
-
-	e->cblock = cblock;
+	e = alloc_particular_entry(&mq->cache_pool, cblock);
 	e->oblock = oblock;
-	e->in_cache = true;
 	e->dirty = false;	/* this gets corrected in a minute */
 	e->hit_count = hint_valid ? hint : 1;
 	e->generation = mq->generation;
@@ -1047,52 +965,58 @@ static int mq_load_mapping(struct dm_cache_policy *p,
 	return 0;
 }
 
+static int mq_save_hints(struct mq_policy *mq, struct queue *q,
+			 policy_walk_fn fn, void *context)
+{
+	int r;
+	unsigned level;
+	struct entry *e;
+
+	for (level = 0; level < NR_QUEUE_LEVELS; level++)
+		list_for_each_entry(e, q->qs + level, list) {
+			r = fn(context, infer_cblock(&mq->cache_pool, e),
+			       e->oblock, e->hit_count);
+			if (r)
+				return r;
+		}
+
+	return 0;
+}
+
 static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
 			    void *context)
 {
 	struct mq_policy *mq = to_mq_policy(p);
 	int r = 0;
-	struct entry *e;
-	unsigned level;
 
 	mutex_lock(&mq->lock);
 
-	for (level = 0; level < NR_QUEUE_LEVELS; level++)
-		list_for_each_entry(e, &mq->cache_clean.qs[level], list) {
-			r = fn(context, e->cblock, e->oblock, e->hit_count);
-			if (r)
-				goto out;
-		}
-
-	for (level = 0; level < NR_QUEUE_LEVELS; level++)
-		list_for_each_entry(e, &mq->cache_dirty.qs[level], list) {
-			r = fn(context, e->cblock, e->oblock, e->hit_count);
-			if (r)
-				goto out;
-		}
+	r = mq_save_hints(mq, &mq->cache_clean, fn, context);
+	if (!r)
+		r = mq_save_hints(mq, &mq->cache_dirty, fn, context);
 
-out:
 	mutex_unlock(&mq->lock);
 
 	return r;
 }
 
-static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
+static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
 {
-	struct mq_policy *mq = to_mq_policy(p);
 	struct entry *e;
 
-	mutex_lock(&mq->lock);
-
 	e = hash_lookup(mq, oblock);
-
-	BUG_ON(!e || !e->in_cache);
+	BUG_ON(!e || !in_cache(mq, e));
 
 	del(mq, e);
-	e->in_cache = false;
-	e->dirty = false;
-	push(mq, e);
+	free_entry(&mq->cache_pool, e);
+}
+
+static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
+{
+	struct mq_policy *mq = to_mq_policy(p);
 
+	mutex_lock(&mq->lock);
+	__remove_mapping(mq, oblock);
 	mutex_unlock(&mq->lock);
 }
 
@@ -1105,7 +1029,7 @@ static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
 		return -ENODATA;
 
 	*oblock = e->oblock;
-	*cblock = e->cblock;
+	*cblock = infer_cblock(&mq->cache_pool, e);
 	e->dirty = false;
 	push(mq, e);
 
@@ -1125,17 +1049,17 @@ static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
 	return r;
 }
 
-static void force_mapping(struct mq_policy *mq,
-			  dm_oblock_t current_oblock, dm_oblock_t new_oblock)
+static void __force_mapping(struct mq_policy *mq,
+			    dm_oblock_t current_oblock, dm_oblock_t new_oblock)
 {
 	struct entry *e = hash_lookup(mq, current_oblock);
 
-	BUG_ON(!e || !e->in_cache);
-
-	del(mq, e);
-	e->oblock = new_oblock;
-	e->dirty = true;
-	push(mq, e);
+	if (e && in_cache(mq, e)) {
+		del(mq, e);
+		e->oblock = new_oblock;
+		e->dirty = true;
+		push(mq, e);
+	}
 }
 
 static void mq_force_mapping(struct dm_cache_policy *p,
@@ -1144,7 +1068,7 @@ static void mq_force_mapping(struct dm_cache_policy *p,
 	struct mq_policy *mq = to_mq_policy(p);
 
 	mutex_lock(&mq->lock);
-	force_mapping(mq, current_oblock, new_oblock);
+	__force_mapping(mq, current_oblock, new_oblock);
 	mutex_unlock(&mq->lock);
 }
 
@@ -1154,7 +1078,7 @@ static dm_cblock_t mq_residency(struct dm_cache_policy *p)
 	struct mq_policy *mq = to_mq_policy(p);
 
 	mutex_lock(&mq->lock);
-	r = to_cblock(mq->nr_cblocks_allocated);
+	r = to_cblock(mq->cache_pool.nr_allocated);
 	mutex_unlock(&mq->lock);
 
 	return r;
@@ -1227,7 +1151,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
 					 sector_t origin_size,
 					 sector_t cache_block_size)
 {
-	int r;
 	struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
 
 	if (!mq)
@@ -1235,8 +1158,18 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
 
 	init_policy_functions(mq);
 	iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
-
 	mq->cache_size = cache_size;
+
+	if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) {
+		DMERR("couldn't initialize pool of pre-cache entries");
+		goto bad_pre_cache_init;
+	}
+
+	if (epool_init(&mq->cache_pool, from_cblock(cache_size))) {
+		DMERR("couldn't initialize pool of cache entries");
+		goto bad_cache_init;
+	}
+
 	mq->tick_protected = 0;
 	mq->tick = 0;
 	mq->hit_count = 0;
@@ -1244,8 +1177,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
 	mq->promote_threshold = 0;
 	mutex_init(&mq->lock);
 	spin_lock_init(&mq->tick_lock);
-	mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG);
-	mq->find_free_last_word = 0;
 
 	queue_init(&mq->pre_cache);
 	queue_init(&mq->cache_clean);
@@ -1253,31 +1184,19 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
 
 	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
 
-	mq->nr_entries = 2 * from_cblock(cache_size);
-	r = alloc_entries(mq, mq->nr_entries);
-	if (r)
-		goto bad_cache_alloc;
-
-	mq->nr_entries_allocated = 0;
-	mq->nr_cblocks_allocated = 0;
-
 	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
 	mq->hash_bits = ffs(mq->nr_buckets) - 1;
 	mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL);
 	if (!mq->table)
 		goto bad_alloc_table;
 
-	mq->allocation_bitset = alloc_bitset(from_cblock(cache_size));
-	if (!mq->allocation_bitset)
-		goto bad_alloc_bitset;
-
 	return &mq->policy;
 
-bad_alloc_bitset:
-	kfree(mq->table);
 bad_alloc_table:
-	free_entries(mq);
-bad_cache_alloc:
+	epool_exit(&mq->cache_pool);
+bad_cache_init:
+	epool_exit(&mq->pre_cache_pool);
+bad_pre_cache_init:
 	kfree(mq);
 
 	return NULL;
@@ -1287,7 +1206,7 @@ bad_cache_alloc:
 
 static struct dm_cache_policy_type mq_policy_type = {
 	.name = "mq",
-	.version = {1, 0, 0},
+	.version = {1, 1, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = mq_create
@@ -1295,7 +1214,7 @@ static struct dm_cache_policy_type mq_policy_type = {
 
 static struct dm_cache_policy_type default_policy_type = {
 	.name = "default",
-	.version = {1, 0, 0},
+	.version = {1, 1, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = mq_create
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 5/6] dm cache: add remove_cblock method to policy interface
  2013-11-11 17:20 [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface Mike Snitzer
                   ` (3 preceding siblings ...)
  2013-11-11 17:20 ` [PATCH 4/6] dm cache policy mq: reduce memory requirements Mike Snitzer
@ 2013-11-11 17:20 ` Mike Snitzer
  2013-11-12  0:18   ` Alasdair G Kergon
  2013-11-11 17:20 ` [PATCH 6/6] dm cache: add cache block invalidation support Mike Snitzer
  5 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 17:20 UTC (permalink / raw)
  To: dm-devel

From: Joe Thornber <ejt@redhat.com>

Implement policy_remove_cblock() and add remove_cblock method to the mq
policy.  These methods will be used by the following cache block
invalidation patch which adds the 'invalidate_cblocks' message to the
cache core.

Also, update some comments in dm-cache-policy.h

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-policy-internal.h |    5 ++++
 drivers/md/dm-cache-policy-mq.c       |   35 +++++++++++++++++++++++++++++++++
 drivers/md/dm-cache-policy.h          |   21 ++++++++++++++++---
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h
index a75f7e7..2256a1f 100644
--- a/drivers/md/dm-cache-policy-internal.h
+++ b/drivers/md/dm-cache-policy-internal.h
@@ -64,6 +64,11 @@ static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t
 	p->remove_mapping(p, oblock);
 }
 
+static inline int policy_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
+{
+	return p->remove_cblock(p, cblock);
+}
+
 static inline void policy_force_mapping(struct dm_cache_policy *p,
 					dm_oblock_t current_oblock, dm_oblock_t new_oblock)
 {
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 782bf85..7209fab 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -304,6 +304,15 @@ static void free_entry(struct entry_pool *ep, struct entry *e)
 	list_add(&e->list, &ep->free);
 }
 
+/*
+ * Returns NULL if the entry is free.
+ */
+static struct entry *epool_find(struct entry_pool *ep, dm_cblock_t cblock)
+{
+	struct entry *e = ep->entries + from_cblock(cblock);
+	return e->hlist.pprev ? e : NULL;
+}
+
 static bool epool_empty(struct entry_pool *ep)
 {
 	return list_empty(&ep->free);
@@ -1020,6 +1029,31 @@ static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
 	mutex_unlock(&mq->lock);
 }
 
+static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock)
+{
+	struct entry *e = epool_find(&mq->cache_pool, cblock);
+
+	if (!e)
+		return -ENODATA;
+
+	del(mq, e);
+	free_entry(&mq->cache_pool, e);
+
+	return 0;
+}
+
+static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
+{
+	int r;
+	struct mq_policy *mq = to_mq_policy(p);
+
+	mutex_lock(&mq->lock);
+	r = __remove_cblock(mq, cblock);
+	mutex_unlock(&mq->lock);
+
+	return r;
+}
+
 static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
 			      dm_cblock_t *cblock)
 {
@@ -1139,6 +1173,7 @@ static void init_policy_functions(struct mq_policy *mq)
 	mq->policy.load_mapping = mq_load_mapping;
 	mq->policy.walk_mappings = mq_walk_mappings;
 	mq->policy.remove_mapping = mq_remove_mapping;
+	mq->policy.remove_cblock = mq_remove_cblock;
 	mq->policy.writeback_work = mq_writeback_work;
 	mq->policy.force_mapping = mq_force_mapping;
 	mq->policy.residency = mq_residency;
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h
index 33369ca..052c00a 100644
--- a/drivers/md/dm-cache-policy.h
+++ b/drivers/md/dm-cache-policy.h
@@ -135,9 +135,6 @@ struct dm_cache_policy {
 	 */
 	int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock);
 
-	/*
-	 * oblock must be a mapped block.  Must not block.
-	 */
 	void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
 	void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
 
@@ -159,8 +156,24 @@ struct dm_cache_policy {
 	void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock,
 			      dm_oblock_t new_oblock);
 
-	int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock);
+	/*
+	 * This is called via the invalidate_cblocks message.  It is
+	 * possible the particular cblock has already been removed due to a
+	 * write io in passthrough mode.  In which case this should return
+	 * -ENODATA.
+	 */
+	int (*remove_cblock)(struct dm_cache_policy *p, dm_cblock_t cblock);
 
+	/*
+	 * Provide a dirty block to be written back by the core target.
+	 *
+	 * Returns:
+	 *
+	 * 0 and @cblock,@oblock: block to write back provided
+	 *
+	 * -ENODATA: no dirty blocks available
+	 */
+	int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock);
 
 	/*
 	 * How full is the cache?
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 6/6] dm cache: add cache block invalidation support
  2013-11-11 17:20 [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface Mike Snitzer
                   ` (4 preceding siblings ...)
  2013-11-11 17:20 ` [PATCH 5/6] dm cache: add remove_cblock method to policy interface Mike Snitzer
@ 2013-11-11 17:20 ` Mike Snitzer
  2013-11-12  0:46   ` Alasdair G Kergon
  5 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 17:20 UTC (permalink / raw)
  To: dm-devel

From: Joe Thornber <ejt@redhat.com>

Cache block invalidation is removing an entry from the cache without
writing it back.  Cache blocks can be invalidated via the
'invalidate_cblocks' message, which takes an arbitrary number of cblock
ranges:
   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*

E.g.
   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 Documentation/device-mapper/cache.txt |   12 ++-
 drivers/md/dm-cache-target.c          |  225 ++++++++++++++++++++++++++++++++-
 2 files changed, 233 insertions(+), 4 deletions(-)

diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
index ff6639f..fc9d2df 100644
--- a/Documentation/device-mapper/cache.txt
+++ b/Documentation/device-mapper/cache.txt
@@ -244,12 +244,22 @@ The message format is:
 E.g.
    dmsetup message my_cache 0 sequential_threshold 1024
 
+
+Invalidation is removing an entry from the cache without writing it
+back.  Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges.
+
+   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.
+   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
 Examples
 ========
 
 The test suite can be found here:
 
-https://github.com/jthornber/thinp-test-suite
+https://github.com/jthornber/device-mapper-test-suite
 
 dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
 	/dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 8c02177..41e664b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -150,6 +150,25 @@ struct cache_stats {
 	atomic_t discard_count;
 };
 
+/*
+ * Defines a range of cblocks, begin to (end - 1) are in the range.  end is
+ * the one-past-the-end value.
+ */
+struct cblock_range {
+	dm_cblock_t begin;
+	dm_cblock_t end;
+};
+
+struct invalidation_request {
+	struct list_head list;
+	struct cblock_range *cblocks;
+
+	atomic_t complete;
+	int err;
+
+	wait_queue_head_t result_wait;
+};
+
 struct cache {
 	struct dm_target *ti;
 	struct dm_target_callbacks callbacks;
@@ -241,6 +260,7 @@ struct cache {
 
 	bool need_tick_bio:1;
 	bool sized:1;
+	bool invalidate:1;
 	bool commit_requested:1;
 	bool loaded_mappings:1;
 	bool loaded_discards:1;
@@ -251,6 +271,12 @@ struct cache {
 	struct cache_features features;
 
 	struct cache_stats stats;
+
+	/*
+	 * Invalidation fields.
+	 */
+	spinlock_t invalidation_lock;
+	struct list_head invalidation_requests;
 };
 
 struct per_bio_data {
@@ -283,6 +309,7 @@ struct dm_cache_migration {
 	bool demote:1;
 	bool promote:1;
 	bool requeue_holder:1;
+	bool invalidate:1;
 
 	struct dm_bio_prison_cell *old_ocell;
 	struct dm_bio_prison_cell *new_ocell;
@@ -904,8 +931,11 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
 			list_add_tail(&mg->list, &cache->quiesced_migrations);
 			spin_unlock_irqrestore(&cache->lock, flags);
 
-		} else
+		} else {
+			if (mg->invalidate)
+				policy_remove_mapping(cache->policy, mg->old_oblock);
 			cleanup_migration(mg);
+		}
 
 	} else {
 		if (mg->requeue_holder)
@@ -1115,6 +1145,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
 	mg->demote = false;
 	mg->promote = true;
 	mg->requeue_holder = true;
+	mg->invalidate = false;
 	mg->cache = cache;
 	mg->new_oblock = oblock;
 	mg->cblock = cblock;
@@ -1137,6 +1168,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
 	mg->demote = false;
 	mg->promote = false;
 	mg->requeue_holder = true;
+	mg->invalidate = false;
 	mg->cache = cache;
 	mg->old_oblock = oblock;
 	mg->cblock = cblock;
@@ -1161,6 +1193,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
 	mg->demote = true;
 	mg->promote = true;
 	mg->requeue_holder = true;
+	mg->invalidate = false;
 	mg->cache = cache;
 	mg->old_oblock = old_oblock;
 	mg->new_oblock = new_oblock;
@@ -1188,6 +1221,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
 	mg->demote = true;
 	mg->promote = false;
 	mg->requeue_holder = true;
+	mg->invalidate = true;
 	mg->cache = cache;
 	mg->old_oblock = oblock;
 	mg->cblock = cblock;
@@ -1525,6 +1559,58 @@ static void writeback_some_dirty_blocks(struct cache *cache)
 }
 
 /*----------------------------------------------------------------
+ * Invalidations.
+ * Dropping something from the cache *without* writing back.
+ *--------------------------------------------------------------*/
+
+static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
+{
+	int r = 0;
+	uint64_t begin = from_cblock(req->cblocks->begin);
+	uint64_t end = from_cblock(req->cblocks->end);
+
+	while (begin != end) {
+		r = policy_remove_cblock(cache->policy, to_cblock(begin));
+		if (!r) {
+			r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
+			if (r)
+				break;
+
+		} else if (r == -ENODATA) {
+			/* harmless, already unmapped */
+			r = 0;
+
+		} else {
+			DMERR("policy_remove_cblock failed");
+			break;
+		}
+
+		begin++;
+        }
+
+	cache->commit_requested = true;
+
+	req->err = r;
+	atomic_set(&req->complete, 1);
+
+	wake_up(&req->result_wait);
+}
+
+static void process_invalidation_requests(struct cache *cache)
+{
+	struct list_head list;
+	struct invalidation_request *req, *tmp;
+
+	INIT_LIST_HEAD(&list);
+	spin_lock(&cache->invalidation_lock);
+	list_splice_init(&cache->invalidation_requests, &list);
+	spin_unlock(&cache->invalidation_lock);
+
+	list_for_each_entry_safe (req, tmp, &list, list)
+		process_invalidation_request(cache, req);
+}
+
+/*----------------------------------------------------------------
  * Main worker loop
  *--------------------------------------------------------------*/
 static bool is_quiescing(struct cache *cache)
@@ -1593,7 +1679,8 @@ static int more_work(struct cache *cache)
 			!bio_list_empty(&cache->deferred_writethrough_bios) ||
 			!list_empty(&cache->quiesced_migrations) ||
 			!list_empty(&cache->completed_migrations) ||
-			!list_empty(&cache->need_commit_migrations);
+			!list_empty(&cache->need_commit_migrations) ||
+			cache->invalidate;
 }
 
 static void do_worker(struct work_struct *ws)
@@ -1605,6 +1692,7 @@ static void do_worker(struct work_struct *ws)
 			writeback_some_dirty_blocks(cache);
 			process_deferred_writethrough_bios(cache);
 			process_deferred_bios(cache);
+			process_invalidation_requests(cache);
 		}
 
 		process_migrations(cache, &cache->quiesced_migrations, issue_copy);
@@ -2271,6 +2359,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	cache->need_tick_bio = true;
 	cache->sized = false;
+	cache->invalidate = false;
 	cache->commit_requested = false;
 	cache->loaded_mappings = false;
 	cache->loaded_discards = false;
@@ -2284,6 +2373,9 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	atomic_set(&cache->stats.commit_count, 0);
 	atomic_set(&cache->stats.discard_count, 0);
 
+	spin_lock_init(&cache->invalidation_lock);
+	INIT_LIST_HEAD(&cache->invalidation_requests);
+
 	*result = cache;
 	return 0;
 
@@ -2833,7 +2925,128 @@ err:
 }
 
 /*
- * Supports <key> <value>.
+ * A cache block range can take two forms:
+ *
+ * i) A single cblock, eg. '3456'
+ * ii) A begin and end cblock with dots between, eg. 123-234
+ */
+static int parse_cblock_range(struct cache *cache, const char *str,
+			      struct cblock_range *result)
+{
+	char dummy;
+	uint64_t b, e;
+	int r;
+
+	/*
+	 * Try and parse form (ii) first.
+	 */
+	r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
+	if (r < 0)
+		return r;
+
+	if (r == 2) {
+		result->begin = to_cblock(b);
+		result->end = to_cblock(e);
+		return 0;
+	}
+
+	/*
+	 * That didn't work, try form (i).
+	 */
+	r = sscanf(str, "%llu%c", &b, &dummy);
+	if (r < 0)
+		return r;
+
+	if (r == 1) {
+		result->begin = to_cblock(b);
+		result->end = to_cblock(from_cblock(result->begin) + 1u);
+		return 0;
+	}
+
+	DMERR("invalid cblock range '%s'", str);
+	return -EINVAL;
+}
+
+static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
+{
+	uint64_t b = from_cblock(range->begin);
+	uint64_t e = from_cblock(range->end);
+	uint64_t n = from_cblock(cache->cache_size);
+
+	if (b >= n) {
+		DMERR("begin cblock out of range: %llu >= %llu", b, n);
+		return -EINVAL;
+	}
+
+	if (e > n) {
+		DMERR("end cblock out of range: %llu > %llu", e, n);
+		return -EINVAL;
+	}
+
+	if (b >= e) {
+		DMERR("invalid cblock range: %llu >= %llu", b, e);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int request_invalidation(struct cache *cache, struct cblock_range *range)
+{
+	struct invalidation_request req;
+
+	INIT_LIST_HEAD(&req.list);
+	req.cblocks = range;
+	atomic_set(&req.complete, 0);
+	req.err = 0;
+	init_waitqueue_head(&req.result_wait);
+
+	spin_lock(&cache->invalidation_lock);
+	list_add(&req.list, &cache->invalidation_requests);
+	spin_unlock(&cache->invalidation_lock);
+	wake_worker(cache);
+
+	wait_event(req.result_wait, atomic_read(&req.complete));
+	return req.err;
+}
+
+static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
+					      const char **cblock_ranges)
+{
+	int r = 0;
+	unsigned i;
+	struct cblock_range range;
+
+	if (!passthrough_mode(&cache->features)) {
+		DMERR("cache has to be in passthrough mode for invalidation");
+		return -EPERM;
+	}
+
+	for (i = 0; i < count; i++) {
+		r = parse_cblock_range(cache, cblock_ranges[i], &range);
+		if (r)
+			break;
+
+		r = validate_cblock_range(cache, &range);
+		if (r)
+			break;
+
+		/*
+		 * Pass begin and end origin blocks to the worker and wake it.
+		 */
+		r = request_invalidation(cache, &range);
+		if (r)
+			break;
+	}
+
+	return r;
+}
+
+/*
+ * Supports
+ *	"<key> <value>"
+ * and
+ *     "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
  *
  * The key migration_threshold is supported by the cache target core.
  */
@@ -2841,6 +3054,12 @@ static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
 {
 	struct cache *cache = ti->private;
 
+	if (!argc)
+		return -EINVAL;
+
+	if (!strcmp(argv[0], "invalidate_cblocks"))
+		return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
+
 	if (argc != 2)
 		return -EINVAL;
 
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 17:20 ` [PATCH 1/6] dm cache: cache shrinking support Mike Snitzer
@ 2013-11-11 21:19   ` Alasdair G Kergon
  2013-11-11 21:32     ` Mike Snitzer
  0 siblings, 1 reply; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-11 21:19 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel

On Mon, Nov 11, 2013 at 12:20:43PM -0500, Mike Snitzer wrote:
> From: Joe Thornber <ejt@redhat.com>
> 
> Allow a cache to shrink if the blocks being removed from the cache are
> not dirty.

Please would you document the intended procedure here?

> +			DMERR("unable to shrink cache due to dirty blocks");

This error is highly undesirable: part of a device has been removed
while it is still needed!

How does someone go about reducing the size of the cache avoiding this
error?  (Analogy: to reduce size of a filesystem you run a filesystem
resize process before reducing the size of the block device.)

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 21:19   ` Alasdair G Kergon
@ 2013-11-11 21:32     ` Mike Snitzer
  2013-11-11 21:48       ` Alasdair G Kergon
                         ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 21:32 UTC (permalink / raw)
  To: dm-devel

On Mon, Nov 11 2013 at  4:19pm -0500,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Mon, Nov 11, 2013 at 12:20:43PM -0500, Mike Snitzer wrote:
> > From: Joe Thornber <ejt@redhat.com>
> > 
> > Allow a cache to shrink if the blocks being removed from the cache are
> > not dirty.
>  
> Please would you document the intended procedure here?

I'm not rebasing this patch (and in turn all commits that follow) to
tweak this header.  We can add information to cache.txt as a follow-on
commit.
 
> > +			DMERR("unable to shrink cache due to dirty blocks");
> 
> This error is highly undesirable: part of a device has been removed
> while it is still needed!

Huh?  The device still had dirty blocks, so the cache wasn't resized.

> How does someone go about reducing the size of the cache avoiding this
> error?  (Analogy: to reduce size of a filesystem you run a filesystem
> resize process before reducing the size of the block device.)

Use the cleaner policy to purge the cache first.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 21:32     ` Mike Snitzer
@ 2013-11-11 21:48       ` Alasdair G Kergon
  2013-11-11 21:50       ` Mike Snitzer
  2013-11-11 23:03       ` Alasdair G Kergon
  2 siblings, 0 replies; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-11 21:48 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 04:32:33PM -0500, Mike Snitzer wrote:
> On Mon, Nov 11 2013 at  4:19pm -0500,
> Alasdair G Kergon <agk@redhat.com> wrote:

> > > +			DMERR("unable to shrink cache due to dirty blocks");
> > This error is highly undesirable: part of a device has been removed
> > while it is still needed!
> Huh?  The device still had dirty blocks, so the cache wasn't resized.

The device no longer has those dirty blocks: you already removed them 
(otherwise you wouldn't have reached this point).

> Use the cleaner policy to purge the cache first.

But how do you do that without also cleaning the part of the cache that
you're keeping?  (For example, why should it be necessary to stop using
your preferred cache policy for a while and clean the entire cache just in
order to reduce the size of the cache slightly?)

If the current situation is temporary then can we indicate what a better
solution is likely to look like and whether it is likely still to use this
proposed shrinker interface or whether it could replace it with something else?

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 21:32     ` Mike Snitzer
  2013-11-11 21:48       ` Alasdair G Kergon
@ 2013-11-11 21:50       ` Mike Snitzer
  2013-11-11 22:09         ` Alasdair G Kergon
  2013-11-11 23:03       ` Alasdair G Kergon
  2 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 21:50 UTC (permalink / raw)
  To: dm-devel

On Mon, Nov 11 2013 at  4:32pm -0500,
Mike Snitzer <snitzer@redhat.com> wrote:

> On Mon, Nov 11 2013 at  4:19pm -0500,
> Alasdair G Kergon <agk@redhat.com> wrote:
> 
> > On Mon, Nov 11, 2013 at 12:20:43PM -0500, Mike Snitzer wrote:
> > > From: Joe Thornber <ejt@redhat.com>
> > > 
> > > Allow a cache to shrink if the blocks being removed from the cache are
> > > not dirty.
> >  
> > Please would you document the intended procedure here?
> 
> I'm not rebasing this patch (and in turn all commits that follow) to
> tweak this header.  We can add information to cache.txt as a follow-on
> commit.
>  
> > > +			DMERR("unable to shrink cache due to dirty blocks");
> > 
> > This error is highly undesirable: part of a device has been removed
> > while it is still needed!
> 
> Huh?  The device still had dirty blocks, so the cache wasn't resized.

Maybe you were saying: the trigger to resize is based on the size of the
fast device having been reduced.. so getting that error _after_ the user
already resized the fast device isn't really useful.

Valid point... but given DM gives users a tremendous amount of room to
hang itself this is par for the course no?  Making this more
bullet-proof could have userspace detect that the device is being used
as in a dm-cache device... and then running userspace utils to analyze
whether the device still has dirty blocks.

Kernel is merely acting as it was told.. and yeah the user could do the
wrong thing.  So much more documentation is warranted for sure.

Mike

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 21:50       ` Mike Snitzer
@ 2013-11-11 22:09         ` Alasdair G Kergon
  2013-11-11 22:15           ` Alasdair G Kergon
  0 siblings, 1 reply; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-11 22:09 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 04:50:09PM -0500, Mike Snitzer wrote:
> Valid point... but given DM gives users a tremendous amount of room to
> hang itself this is par for the course no?  Making this more
> bullet-proof could have userspace detect that the device is being used
> as in a dm-cache device... and then running userspace utils to analyze
> whether the device still has dirty blocks.

Indeed - I am asking what the intended process for userspace to follow
is, as I can't see any particularly efficient one at the moment (for a
general case with many dirty blocks) without first making further kernel
changes - and some ways to do this better might involve using a
different kernel interface instead of the one included in this patch.

In other words, I'm asking for some details of the future direction of
the cache shrinking feature so we can see that the interface in this
patch is genuinely useful to userspace and will be built upon as the
userspace shrinking features are developed.  Otherwise I think it could
be premature to include it if it turns out userspace needs to do things
a different way with a different interface.

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 22:09         ` Alasdair G Kergon
@ 2013-11-11 22:15           ` Alasdair G Kergon
  2013-11-11 23:06             ` Mike Snitzer
  0 siblings, 1 reply; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-11 22:15 UTC (permalink / raw)
  To: device-mapper development

Could you make the case that this interface is only intended for use
when using the cache in a mode that doesn't have dirty blocks?

And that the mechanism for shrinking a cache with dirty blocks has not
yet been developed but might use a quite different interface?

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 21:32     ` Mike Snitzer
  2013-11-11 21:48       ` Alasdair G Kergon
  2013-11-11 21:50       ` Mike Snitzer
@ 2013-11-11 23:03       ` Alasdair G Kergon
  2013-11-11 23:08         ` Mike Snitzer
  2 siblings, 1 reply; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-11 23:03 UTC (permalink / raw)
  To: device-mapper development

> DMERR("unable to shrink cache due to dirty blocks");

Also, what is the process to recover from this error, given that you have
already lost the end of the device and you cannot now resume the device
without a preresume failure?

Do the tools support retrieval of the dirty blocks from the portion of the
device that you do still have?

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 22:15           ` Alasdair G Kergon
@ 2013-11-11 23:06             ` Mike Snitzer
  2013-11-11 23:17               ` Alasdair G Kergon
  0 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 23:06 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 5:15 PM, Alasdair G Kergon <agk@redhat.com> wrote:
> Could you make the case that this interface is only intended for use
> when using the cache in a mode that doesn't have dirty blocks?

This really isn't even an "interface".  All this patch enables a user
to do is shrink the cache device is they have taken care to not have
dirty blocks (be it "cleaner" policy or passthrough or writethrough
operation modes).

> And that the mechanism for shrinking a cache with dirty blocks has not
> yet been developed but might use a quite different interface?

We can add an interface to instruct the cache to clean all dirty
blocks >= a particular size [1].  Whereby enabling even dirty/hot
caches to remain generally useful while allowing to shrink the fast
device.

But no I'm not prepared to predict what that'll look like, and no that
doesn't invalidate the basic support that this patch is providing.

We'll improve the docs but there is no reason to withhold this patch
waiting for more add-ons.  By the time lvm2 has support for dm-cache
this very likely won't be a concern anyway (e.g. we'll have the
ability to do [1] above).

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 23:03       ` Alasdair G Kergon
@ 2013-11-11 23:08         ` Mike Snitzer
  0 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 23:08 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 6:03 PM, Alasdair G Kergon <agk@redhat.com> wrote:
>> DMERR("unable to shrink cache due to dirty blocks");
>
> Also, what is the process to recover from this error, given that you have
> already lost the end of the device and you cannot now resume the device
> without a preresume failure?
>
> Do the tools support retrieval of the dirty blocks from the portion of the
> device that you do still have?

Whatever the user used to resize the fast device would need to be
reversed.  The metadata hasn't changed, and provided the user can
actually get the original data back (possible with linear volume
anyway) then they can easily recover.  More sophisticated volumes may
not make this possible....

It hurts when one shoots one's self in the foot.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 23:06             ` Mike Snitzer
@ 2013-11-11 23:17               ` Alasdair G Kergon
  2013-11-11 23:34                 ` Mike Snitzer
  0 siblings, 1 reply; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-11 23:17 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 06:06:26PM -0500, Mike Snitzer wrote:
> This really isn't even an "interface".  

It is an extension to the existing userspace-kernel interface: a size
reduction of an underlying device is detected on reload and triggers the
removal of metadata relating to the removed portion of the device.

Another interface might for example require a message to be sent to set
a threshold above which the device is 'cleaned' until there is no
metadata pointing beyond that point.  A subsequent reload using the
smaller underlying device would not then need to trigger any metadata
removal: it would already have been removed.

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] dm cache: cache shrinking support
  2013-11-11 23:17               ` Alasdair G Kergon
@ 2013-11-11 23:34                 ` Mike Snitzer
  0 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2013-11-11 23:34 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 6:17 PM, Alasdair G Kergon <agk@redhat.com> wrote:
> On Mon, Nov 11, 2013 at 06:06:26PM -0500, Mike Snitzer wrote:
>> This really isn't even an "interface".
>
> It is an extension to the existing userspace-kernel interface: a size
> reduction of an underlying device is detected on reload and triggers the
> removal of metadata relating to the removed portion of the device.
>
> Another interface might for example require a message to be sent to set
> a threshold above which the device is 'cleaned' until there is no
> metadata pointing beyond that point.  A subsequent reload using the
> smaller underlying device would not then need to trigger any metadata
> removal: it would already have been removed.

There is no reason to make them mutually exclusive.  Or to _only_ have
the later behavior you suggested where metadata is automatically
resized once clean below the specified threshold.

Even without this patch the user is already screwed if they resize
away a dirty portion of the fast device while it is actively in use by
dm-cache.

relative to your concerns: All this patch changes is detecting the
shrink when still dirty and tells the user they have shot themselves
in the foot.  So if anything this patch offers an improvement.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/6] dm cache: add passthrough mode
  2013-11-11 17:20 ` [PATCH 2/6] dm cache: add passthrough mode Mike Snitzer
@ 2013-11-11 23:40   ` Alasdair G Kergon
  0 siblings, 0 replies; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-11 23:40 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 12:20:44PM -0500, Mike Snitzer wrote:
> From: Joe Thornber <ejt@redhat.com>

> Later, applications can perform coherency checks, the nature of which
> will depend on the type of the underlying storage.  

Any examples of how this might be done?
Might one of the later patches in this series be needed, for example,
to invalidate parts of the device found to be out-of-date before
using the cache normally again?

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/6] dm cache: add remove_cblock method to policy interface
  2013-11-11 17:20 ` [PATCH 5/6] dm cache: add remove_cblock method to policy interface Mike Snitzer
@ 2013-11-12  0:18   ` Alasdair G Kergon
  0 siblings, 0 replies; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-12  0:18 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 12:20:47PM -0500, Mike Snitzer wrote:
> From: Joe Thornber <ejt@redhat.com>
 
> +static struct entry *epool_find(struct entry_pool *ep, dm_cblock_t cblock)
> +{
> +	struct entry *e = ep->entries + from_cblock(cblock);
> +	return e->hlist.pprev ? e : NULL;

Please use hlist_unhashed() rather than accessing pprev explicitly where possible.

static inline int hlist_unhashed(const struct hlist_node *h)
{
        return !h->pprev;
}

> +++ b/drivers/md/dm-cache-policy.h
> @@ -135,9 +135,6 @@ struct dm_cache_policy {
>  	 */
>  	int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock);
>  
> -	/*
> -	 * oblock must be a mapped block.  Must not block.
> -	 */
>  	void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
>  	void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
  
So a policy can no longer assume oblock is a mapped block, and these functions
may block?

Is this consistent with 
          BUG_ON(!e || !in_cache(mq, e));
in __mq_set_clear_dirty?

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/6] dm cache: add cache block invalidation support
  2013-11-11 17:20 ` [PATCH 6/6] dm cache: add cache block invalidation support Mike Snitzer
@ 2013-11-12  0:46   ` Alasdair G Kergon
  2013-11-12 17:35     ` Mike Snitzer
  0 siblings, 1 reply; 23+ messages in thread
From: Alasdair G Kergon @ 2013-11-12  0:46 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11, 2013 at 12:20:48PM -0500, Mike Snitzer wrote:
> From: Joe Thornber <ejt@redhat.com>

> Cache block invalidation is removing an entry from the cache without
> writing it back.  Cache blocks can be invalidated via the
> 'invalidate_cblocks' message, which takes an arbitrary number of cblock
> ranges:
>    invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*

If we're expecting to need to invalidate a large number of ranges, should we
switch this over to hexadecimal like we did for dm-switch?

Should we consider changing this to <cblock>+<length> as the dm
interfaces tend to work with a start offset and a length?

Or will it be that the userspace interface will not be the bottleneck in
this code so it really doesn't matter about attempting to optimise that
part?

> +++ b/Documentation/device-mapper/cache.txt

>  The test suite can be found here:
> -https://github.com/jthornber/thinp-test-suite
> +https://github.com/jthornber/device-mapper-test-suite

It's really not good if URLs in existing documentation become invalid
like this: Could an obvious pointer be placed at the old location to
direct people to the new location?

Can we document whether or not all the ranges will have been invalidated
before the message returns?

> +	if (!passthrough_mode(&cache->features)) {
> +		DMERR("cache has to be in passthrough mode for invalidation");

Can we document this too?

> @@ -2841,6 +3054,12 @@ static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
> +	if (!strcmp(argv[0], "invalidate_cblocks"))

strcasecmp please!

Alasdair

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/6] dm cache: add cache block invalidation support
  2013-11-12  0:46   ` Alasdair G Kergon
@ 2013-11-12 17:35     ` Mike Snitzer
  2013-11-12 19:26       ` Mike Snitzer
  0 siblings, 1 reply; 23+ messages in thread
From: Mike Snitzer @ 2013-11-12 17:35 UTC (permalink / raw)
  To: device-mapper development

On Mon, Nov 11 2013 at  7:46pm -0500,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Mon, Nov 11, 2013 at 12:20:48PM -0500, Mike Snitzer wrote:
> > From: Joe Thornber <ejt@redhat.com>
>  
> > Cache block invalidation is removing an entry from the cache without
> > writing it back.  Cache blocks can be invalidated via the
> > 'invalidate_cblocks' message, which takes an arbitrary number of cblock
> > ranges:
> >    invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
>  
> If we're expecting to need to invalidate a large number of ranges, should we
> switch this over to hexadecimal like we did for dm-switch?
> 
> Should we consider changing this to <cblock>+<length> as the dm
> interfaces tend to work with a start offset and a length?
> 
> Or will it be that the userspace interface will not be the bottleneck in
> this code so it really doesn't matter about attempting to optimise that
> part?

TBD.  Plan is to introduce "invalidate_cblocks_hex" variant if needed.

> > +++ b/Documentation/device-mapper/cache.txt
> 
> >  The test suite can be found here:
> > -https://github.com/jthornber/thinp-test-suite
> > +https://github.com/jthornber/device-mapper-test-suite
> 
> It's really not good if URLs in existing documentation become invalid
> like this: Could an obvious pointer be placed at the old location to
> direct people to the new location?
> 
> Can we document whether or not all the ranges will have been invalidated
> before the message returns?
> 
> > +	if (!passthrough_mode(&cache->features)) {
> > +		DMERR("cache has to be in passthrough mode for invalidation");
> 
> Can we document this too?
> 
> > @@ -2841,6 +3054,12 @@ static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
> > +	if (!strcmp(argv[0], "invalidate_cblocks"))
> 
> strcasecmp please!

Pushed the following commit to address your comments in this patchset,
can rebase this commit if you have any further suggestions:
http://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-next&id=e67df7acf867b654c6eca7d08ee0452e9c5e4534

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/6] dm cache: add cache block invalidation support
  2013-11-12 17:35     ` Mike Snitzer
@ 2013-11-12 19:26       ` Mike Snitzer
  0 siblings, 0 replies; 23+ messages in thread
From: Mike Snitzer @ 2013-11-12 19:26 UTC (permalink / raw)
  To: device-mapper development

On Tue, Nov 12 2013 at 12:35pm -0500,
Mike Snitzer <snitzer@redhat.com> wrote:
 
> Pushed the following commit to address your comments in this patchset,
> can rebase this commit if you have any further suggestions:
> http://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-next&id=e67df7acf867b654c6eca7d08ee0452e9c5e4534

rebased with minor cache.txt revision here:
http://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-next&id=7b6b2bc98c0303b7f043ad5b35906f833e56308d

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2013-11-12 19:26 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-11-11 17:20 [PATCH for-3.13 0/6] dm cache: simple cache block invalidation interface Mike Snitzer
2013-11-11 17:20 ` [PATCH 1/6] dm cache: cache shrinking support Mike Snitzer
2013-11-11 21:19   ` Alasdair G Kergon
2013-11-11 21:32     ` Mike Snitzer
2013-11-11 21:48       ` Alasdair G Kergon
2013-11-11 21:50       ` Mike Snitzer
2013-11-11 22:09         ` Alasdair G Kergon
2013-11-11 22:15           ` Alasdair G Kergon
2013-11-11 23:06             ` Mike Snitzer
2013-11-11 23:17               ` Alasdair G Kergon
2013-11-11 23:34                 ` Mike Snitzer
2013-11-11 23:03       ` Alasdair G Kergon
2013-11-11 23:08         ` Mike Snitzer
2013-11-11 17:20 ` [PATCH 2/6] dm cache: add passthrough mode Mike Snitzer
2013-11-11 23:40   ` Alasdair G Kergon
2013-11-11 17:20 ` [PATCH 3/6] dm cache metadata: check the metadata version when reading the superblock Mike Snitzer
2013-11-11 17:20 ` [PATCH 4/6] dm cache policy mq: reduce memory requirements Mike Snitzer
2013-11-11 17:20 ` [PATCH 5/6] dm cache: add remove_cblock method to policy interface Mike Snitzer
2013-11-12  0:18   ` Alasdair G Kergon
2013-11-11 17:20 ` [PATCH 6/6] dm cache: add cache block invalidation support Mike Snitzer
2013-11-12  0:46   ` Alasdair G Kergon
2013-11-12 17:35     ` Mike Snitzer
2013-11-12 19:26       ` Mike Snitzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.