All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2
@ 2012-04-28  4:44 Mike Snitzer
  2012-04-28  4:44 ` [PATCH 2/2] dm thin: support for non power of 2 pool blocksize Mike Snitzer
  2012-04-30 16:10 ` [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2 Alasdair G Kergon
  0 siblings, 2 replies; 13+ messages in thread
From: Mike Snitzer @ 2012-04-28  4:44 UTC (permalink / raw)
  To: dm-devel; +Cc: ejt, agk

Required to support a target's use of a non power of 2 blocksize.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e24143c..fe57285 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -973,10 +973,10 @@ static sector_t max_io_len(sector_t sector, struct dm_target *ti)
 	 * Does the target need to split even further ?
 	 */
 	if (ti->split_io) {
-		sector_t boundary;
 		sector_t offset = dm_target_offset(ti, sector);
-		boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
-			   - offset;
+		sector_t boundary, tmp = offset + ti->split_io;
+
+		boundary = ti->split_io - do_div(tmp, ti->split_io);
 		if (len > boundary)
 			len = boundary;
 	}
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] dm thin: support for non power of 2 pool blocksize
  2012-04-28  4:44 [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2 Mike Snitzer
@ 2012-04-28  4:44 ` Mike Snitzer
  2012-04-28  4:51   ` [PATCH] thinp-test-suite: " Mike Snitzer
  2012-04-30  9:55   ` [PATCH 2/2] dm thin: " Joe Thornber
  2012-04-30 16:10 ` [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2 Alasdair G Kergon
  1 sibling, 2 replies; 13+ messages in thread
From: Mike Snitzer @ 2012-04-28  4:44 UTC (permalink / raw)
  To: dm-devel; +Cc: ejt, agk

Non power of 2 blocksize support is needed to properly align thinp IO
on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).

Use do_div wrappers to support non power of 2 blocksize for the pool's
data device.  do_div provides comparable performance to the power of 2
math that was performed until now (as tested on modern x86_64 hardware).

Verify that the pool's blocksize is a multiple of 64K and that the
pool's data device is a multiple of blocksize.

Eliminate pool structure's 'sectors_per_block', 'block_shift' and
remaining 4 byte holes.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-thin.c |   56 +++++++++++++++++++++++++++++++++----------------
 1 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ce7dd80..91644b4 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -507,10 +507,8 @@ struct pool {
 	struct block_device *md_dev;
 	struct dm_pool_metadata *pmd;
 
-	uint32_t sectors_per_block;
-	unsigned block_shift;
-	dm_block_t offset_mask;
 	dm_block_t low_water_blocks;
+	uint32_t sectors_per_block;
 
 	struct pool_features pf;
 	unsigned low_water_triggered:1;	/* A dm event has been sent */
@@ -523,8 +521,8 @@ struct pool {
 	struct work_struct worker;
 	struct delayed_work waker;
 
-	unsigned ref_count;
 	unsigned long last_commit_jiffies;
+	unsigned ref_count;
 
 	spinlock_t lock;
 	struct bio_list deferred_bios;
@@ -673,9 +671,27 @@ static void requeue_io(struct thin_c *tc)
  * target.
  */
 
+/*
+ * do_div wrappers that don't modify the dividend
+ */
+static inline sector_t dm_thin_do_div(sector_t a, __u32 b)
+{
+	sector_t r = a;
+
+	do_div(r, b);
+	return r;
+}
+
+static inline sector_t dm_thin_do_mod(sector_t a, __u32 b)
+{
+	sector_t tmp = a;
+
+	return do_div(tmp, b);
+}
+
 static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
 {
-	return bio->bi_sector >> tc->pool->block_shift;
+	return dm_thin_do_div(bio->bi_sector, tc->pool->sectors_per_block);
 }
 
 static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
@@ -683,8 +699,8 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
 	struct pool *pool = tc->pool;
 
 	bio->bi_bdev = tc->pool_dev->bdev;
-	bio->bi_sector = (block << pool->block_shift) +
-		(bio->bi_sector & pool->offset_mask);
+	bio->bi_sector = (block * pool->sectors_per_block) +
+		dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block);
 }
 
 static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -929,9 +945,8 @@ static void process_prepared(struct pool *pool, struct list_head *head,
  */
 static int io_overlaps_block(struct pool *pool, struct bio *bio)
 {
-	return !(bio->bi_sector & pool->offset_mask) &&
+	return !dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block) &&
 		(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
-
 }
 
 static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1234,8 +1249,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
 			 * part of the discard that is in a subsequent
 			 * block.
 			 */
-			sector_t offset = bio->bi_sector - (block << pool->block_shift);
-			unsigned remaining = (pool->sectors_per_block - offset) << 9;
+			sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
+			unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
 			bio->bi_size = min(bio->bi_size, remaining);
 
 			cell_release_singleton(cell, bio);
@@ -1696,8 +1711,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 
 	pool->pmd = pmd;
 	pool->sectors_per_block = block_size;
-	pool->block_shift = ffs(block_size) - 1;
-	pool->offset_mask = block_size - 1;
 	pool->low_water_blocks = 0;
 	pool_features_init(&pool->pf);
 	pool->prison = prison_create(PRISON_CELLS);
@@ -1941,12 +1954,18 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
 	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
 	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
-	    !is_power_of_2(block_size)) {
+	    dm_thin_do_mod(block_size, DATA_DEV_BLOCK_SIZE_MIN_SECTORS)) {
 		ti->error = "Invalid block size";
 		r = -EINVAL;
 		goto out;
 	}
 
+	if (dm_thin_do_mod(ti->len, block_size)) {
+		ti->error = "Data device is not a multiple of block size";
+		r = -EINVAL;
+		goto out;
+	}
+
 	if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
 		ti->error = "Invalid low water mark";
 		r = -EINVAL;
@@ -2089,7 +2108,7 @@ static int pool_preresume(struct dm_target *ti)
 	if (r)
 		return r;
 
-	data_size = ti->len >> pool->block_shift;
+	data_size = dm_thin_do_div(ti->len, pool->sectors_per_block);
 	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
 	if (r) {
 		DMERR("failed to retrieve data device size");
@@ -2709,17 +2728,18 @@ static int thin_iterate_devices(struct dm_target *ti,
 {
 	dm_block_t blocks;
 	struct thin_c *tc = ti->private;
+	struct pool *pool = tc->pool;
 
 	/*
 	 * We can't call dm_pool_get_data_dev_size() since that blocks.  So
 	 * we follow a more convoluted path through to the pool's target.
 	 */
-	if (!tc->pool->ti)
+	if (!pool->ti)
 		return 0;	/* nothing is bound */
 
-	blocks = tc->pool->ti->len >> tc->pool->block_shift;
+	blocks = dm_thin_do_div(pool->ti->len, pool->sectors_per_block);
 	if (blocks)
-		return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
+		return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
 
 	return 0;
 }
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH] thinp-test-suite: support for non power of 2 pool blocksize
  2012-04-28  4:44 ` [PATCH 2/2] dm thin: support for non power of 2 pool blocksize Mike Snitzer
@ 2012-04-28  4:51   ` Mike Snitzer
  2012-04-28 15:32     ` Mike Snitzer
  2012-04-30 10:15     ` [PATCH] " Joe Thornber
  2012-04-30  9:55   ` [PATCH 2/2] dm thin: " Joe Thornber
  1 sibling, 2 replies; 13+ messages in thread
From: Mike Snitzer @ 2012-04-28  4:51 UTC (permalink / raw)
  To: dm-devel; +Cc: ejt, agk

On Sat, Apr 28 2012 at 12:44am -0400,
Mike Snitzer <snitzer@redhat.com> wrote:

> Non power of 2 blocksize support is needed to properly align thinp IO
> on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).
> 
> Use do_div wrappers to support non power of 2 blocksize for the pool's
> data device.  do_div provides comparable performance to the power of 2
> math that was performed until now (as tested on modern x86_64 hardware).
> 
> Verify that the pool's blocksize is a multiple of 64K and that the
> pool's data device is a multiple of blocksize.

The non power of 2 support patch required quite a few thinp-test-suite
fixes; this works for me but there may be more clever/clean ways to
address rounding down the pool size to a multiple of blocksize...

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 creation_tests.rb      |   26 ++++++++++++++++++++------
 lib/thinp-test.rb      |    2 ++
 multiple_pool_tests.rb |    2 ++
 pool_resize_tests.rb   |    4 +++-
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/creation_tests.rb b/creation_tests.rb
index 7c298cc..3d804e1 100644
--- a/creation_tests.rb
+++ b/creation_tests.rb
@@ -41,8 +41,9 @@ class CreationTests < ThinpTestCase
   end
 
   def test_huge_block_size
-    size = @size
     data_block_size = 524288
+    size = @size / data_block_size
+    size *= data_block_size
     volume_size = 524288
     lwm = 5
     table = Table.new(ThinPool.new(size, @metadata_dev, @data_dev,
@@ -54,12 +55,22 @@ class CreationTests < ThinpTestCase
 
   tag :thinp_target, :quick
 
-  def test_non_power_of_2_data_block_size_fails
-    table = Table.new(ThinPool.new(@size, @metadata_dev, @data_dev,
-                                   @data_block_size + 57, @low_water_mark))
+  def test_data_dev_not_multiple_of_block_size_fails
+    size = @size - 128 + 1
+    table = Table.new(ThinPool.new(size, @metadata_dev, @data_dev,
+                                   128, @low_water_mark))
     assert_bad_table(table)
   end
 
+  def test_non_power_of_2_data_block_size_succeeds
+    data_block_size = 384
+    size = @size / data_block_size
+    size *= data_block_size
+    table = Table.new(ThinPool.new(size, @metadata_dev, @data_dev,
+                                   data_block_size, @low_water_mark))
+    @dm.with_dev(table) {|pool| {}}
+  end
+
   def test_too_small_data_block_size_fails
     table = Table.new(ThinPool.new(@size, @metadata_dev, @data_dev,
                                    64, @low_water_mark))
@@ -73,8 +84,11 @@ class CreationTests < ThinpTestCase
   end
 
   def test_largest_data_block_size_succeeds
-    table = Table.new(ThinPool.new(@size, @metadata_dev, @data_dev,
-                                   2**21, @low_water_mark))
+    data_block_size = 2**21
+    size = @size / data_block_size
+    size *= data_block_size
+    table = Table.new(ThinPool.new(size, @metadata_dev, @data_dev,
+                                   data_block_size, @low_water_mark))
     @dm.with_dev(table) {|pool| {}}
   end
 
diff --git a/lib/thinp-test.rb b/lib/thinp-test.rb
index 7152c2e..3cbf254 100644
--- a/lib/thinp-test.rb
+++ b/lib/thinp-test.rb
@@ -32,6 +32,8 @@ class ThinpTestCase < Test::Unit::TestCase
 
     @volume_size = config[:volume_size]
     @volume_size = 2097152 if @volume_size.nil?
+    @volume_size /= @data_block_size
+    @volume_size *= @data_block_size
 
     @tiny_size = @data_block_size
 
diff --git a/multiple_pool_tests.rb b/multiple_pool_tests.rb
index 75afa49..93b1cdd 100644
--- a/multiple_pool_tests.rb
+++ b/multiple_pool_tests.rb
@@ -85,6 +85,8 @@ class MultiplePoolTests < ThinpTestCase
     md_size = limit_metadata_dev_size(tvm.free_space / 16)
     tvm.add_volume(linear_vol('md', md_size))
     data_size = limit_data_dev_size(tvm.free_space)
+    data_size /= @block_size
+    data_size *= @block_size
     tvm.add_volume(linear_vol('data', data_size))
 
     with_devs(tvm.table('md'),
diff --git a/pool_resize_tests.rb b/pool_resize_tests.rb
index 3b17b44..6c8eadf 100644
--- a/pool_resize_tests.rb
+++ b/pool_resize_tests.rb
@@ -17,6 +17,8 @@ class PoolResizeTests < ThinpTestCase
     super
     @low_water_mark = 0
     @data_block_size = 128
+    @size /= @data_block_size
+    @size *= @data_block_size
   end
 
   tag :thinp_target
@@ -143,7 +145,7 @@ class PoolResizeTests < ThinpTestCase
   def test_ext4_runs_out_of_space
     # we create a pool with a really tiny data volume that wont be
     # able to complete a mkfs.
-    with_standard_pool(16) do |pool|
+    with_standard_pool(128) do |pool|
       with_new_thin(pool, @volume_size, 0) do |thin|
 
         event_tracker = pool.event_tracker;

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: thinp-test-suite: support for non power of 2 pool blocksize
  2012-04-28  4:51   ` [PATCH] thinp-test-suite: " Mike Snitzer
@ 2012-04-28 15:32     ` Mike Snitzer
  2012-04-30 10:15     ` [PATCH] " Joe Thornber
  1 sibling, 0 replies; 13+ messages in thread
From: Mike Snitzer @ 2012-04-28 15:32 UTC (permalink / raw)
  To: dm-devel; +Cc: ejt, agk

On Sat, Apr 28 2012 at 12:51am -0400,
Mike Snitzer <snitzer@redhat.com> wrote:

> On Sat, Apr 28 2012 at 12:44am -0400,
> Mike Snitzer <snitzer@redhat.com> wrote:
> 
> > Non power of 2 blocksize support is needed to properly align thinp IO
> > on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).
> > 
> > Use do_div wrappers to support non power of 2 blocksize for the pool's
> > data device.  do_div provides comparable performance to the power of 2
> > math that was performed until now (as tested on modern x86_64 hardware).
> > 
> > Verify that the pool's blocksize is a multiple of 64K and that the
> > pool's data device is a multiple of blocksize.
> 
> The non power of 2 support patch required quite a few thinp-test-suite
> fixes; this works for me but there may be more clever/clean ways to
> address rounding down the pool size to a multiple of blocksize...

Need this too:

diff --git a/pool_resize_tests.rb b/pool_resize_tests.rb
index 6c8eadf..61e046f 100644
--- a/pool_resize_tests.rb
+++ b/pool_resize_tests.rb
@@ -50,6 +50,8 @@ class PoolResizeTests < ThinpTestCase
 
   def test_resize_no_io
     target_step = @size / 8
+    target_step /= @data_block_size
+    target_step *= @data_block_size
     with_standard_pool(target_step) do |pool|
       2.upto(8) do |n|
         table = Table.new(ThinPool.new(n * target_step, @metadata_dev, @data_dev,

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] dm thin: support for non power of 2 pool blocksize
  2012-04-28  4:44 ` [PATCH 2/2] dm thin: support for non power of 2 pool blocksize Mike Snitzer
  2012-04-28  4:51   ` [PATCH] thinp-test-suite: " Mike Snitzer
@ 2012-04-30  9:55   ` Joe Thornber
  2012-04-30 17:33     ` Mike Snitzer
  1 sibling, 1 reply; 13+ messages in thread
From: Joe Thornber @ 2012-04-30  9:55 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, ejt, agk

Hi Mike,

In general this looks good.  A lot cleaner now you've dropped the
specialisation of the division.  A few nit-picks below.

- Joe

On Sat, Apr 28, 2012 at 12:44:29AM -0400, Mike Snitzer wrote:
> +/*
> + * do_div wrappers that don't modify the dividend
> + */
> +static inline sector_t dm_thin_do_div(sector_t a, __u32 b)
> +{
> +	sector_t r = a;
> +
> +	do_div(r, b);
> +	return r;
> +}
> +
> +static inline sector_t dm_thin_do_mod(sector_t a, __u32 b)
> +{
> +	sector_t tmp = a;
> +
> +	return do_div(tmp, b);
> +}

Please don't inline static functions.  Let the compiler make the
decision.

Also those sector_t's are passed by value, so you don't need to
declare r or tmp.  eg, it's enough to do this:

static sector_t dm_thin_do_div(sector_t a, __u32 b)
{
	do_div(a, b);
	return a;
}

static sector_t dm_thin_do_mod(sector_t a, __u32 b)
{
	return do_div(a, b);
}

> @@ -1941,12 +1954,18 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)

...

> +	if (dm_thin_do_mod(ti->len, block_size)) {
> +		ti->error = "Data device is not a multiple of block size";
> +		r = -EINVAL;
> +		goto out;
> +	}

I don't see the need for this check.  If I have a disk that isn't a
multiple of the block size why should I have to layer a linear mapping
on it to truncate it before I can use it as a data volume?  Any
partial block at the end of the device is already ignored (see the
data_size calculation in pool_preresume).  Is this restriction causing
some of the changes you made to the test-suite?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] thinp-test-suite: support for non power of 2 pool blocksize
  2012-04-28  4:51   ` [PATCH] thinp-test-suite: " Mike Snitzer
  2012-04-28 15:32     ` Mike Snitzer
@ 2012-04-30 10:15     ` Joe Thornber
  1 sibling, 0 replies; 13+ messages in thread
From: Joe Thornber @ 2012-04-30 10:15 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, ejt, agk

On Sat, Apr 28, 2012 at 12:51:51AM -0400, Mike Snitzer wrote:
> On Sat, Apr 28 2012 at 12:44am -0400,
> Mike Snitzer <snitzer@redhat.com> wrote:
> The non power of 2 support patch required quite a few thinp-test-suite
> fixes; this works for me but there may be more clever/clean ways to
> address rounding down the pool size to a multiple of blocksize...

Yes, I need to do some work here.  We can't keep tweaking these magic
numbers.

- Some tests hardwire a block size, eg, my cache_tests, your
  non-power-2 tests.

- Some tests can prove their point with a small pool size.  So no point
  wasting time by running on bigger pools.

- Some tests would definitely benefit by scaling up to larger pools,
  more thins etc, as hardware allows.  But we should probably control
  this from the command line - big tests take a long time to run, so
  we shouldn't assume we should scale up the tests to consume all disk
  space.

- different hardware has different logical block sizes, which can
  effect io sizes (eg, see discard tests).  The code needs to make it
  clear that the sizes are being influenced by the LBS.

- We should make it clear in the results, and the test source, which
  tests scaled up and how.  eg, did data_block_size change?,
  data_dev_size?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2
  2012-04-28  4:44 [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2 Mike Snitzer
  2012-04-28  4:44 ` [PATCH 2/2] dm thin: support for non power of 2 pool blocksize Mike Snitzer
@ 2012-04-30 16:10 ` Alasdair G Kergon
  2012-04-30 17:24   ` Mike Snitzer
  1 sibling, 1 reply; 13+ messages in thread
From: Alasdair G Kergon @ 2012-04-30 16:10 UTC (permalink / raw)
  To: Mike Snitzer, Jonathan Brassow; +Cc: dm-devel, ejt, agk

On Sat, Apr 28, 2012 at 12:44:28AM -0400, Mike Snitzer wrote:
> Required to support a target's use of a non power of 2 blocksize.
 
For which targets?
(merge_bvec supported?)

> +		boundary = ti->split_io - do_div(tmp, ti->split_io);

sector_div()?

What about 32-bit arch + LBD + large split_io (from raid?)
  - Is a 32-bit restriction on split_io unreasonable nowadays?
    - OR reasonable on 32bit/LBD?
    - OR fallback to old code there?

Alasdair

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2
  2012-04-30 16:10 ` [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2 Alasdair G Kergon
@ 2012-04-30 17:24   ` Mike Snitzer
  2012-04-30 18:36     ` Alasdair G Kergon
  0 siblings, 1 reply; 13+ messages in thread
From: Mike Snitzer @ 2012-04-30 17:24 UTC (permalink / raw)
  To: Jonathan Brassow, dm-devel, ejt, agk

On Mon, Apr 30 2012 at 12:10pm -0400,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Sat, Apr 28, 2012 at 12:44:28AM -0400, Mike Snitzer wrote:
> > Required to support a target's use of a non power of 2 blocksize.
>  
> For which targets?

striped and thin-pool for starters.

> (merge_bvec supported?)

Yes.

> > +		boundary = ti->split_io - do_div(tmp, ti->split_io);
> 
> sector_div()?
>
> What about 32-bit arch + LBD + large split_io (from raid?)
>   - Is a 32-bit restriction on split_io unreasonable nowadays?
>     - OR reasonable on 32bit/LBD?
>     - OR fallback to old code there?

I cannot see why we'd need a split_io that is larger than 32 bits -- a
32bit split_io can support up to 2TB (2**32 * 512b sectors).  Even
on a LBD (raid) the stripe size (split_io) will not be so large.

(though yes we would need to establish a check in DM core that split_io is
limited to 32-bit -- even though the 'sector_t' is used for split_io;
and the comment inside the 'struct dm_target' would need updating).

But what I think what you're driving at is: is there a benefit/reason to
maintain the old code for some target that won't ever use non power of 2
split_io (e.g. dm-raid at the moment)?  I see no point for the duality
in the code but I'm open to the idea if you have a specific reason in
mind -- are you concerned about perf on more obscure/older hardware?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] dm thin: support for non power of 2 pool blocksize
  2012-04-30  9:55   ` [PATCH 2/2] dm thin: " Joe Thornber
@ 2012-04-30 17:33     ` Mike Snitzer
  2012-05-01  9:41       ` Joe Thornber
  0 siblings, 1 reply; 13+ messages in thread
From: Mike Snitzer @ 2012-04-30 17:33 UTC (permalink / raw)
  To: dm-devel, ejt, agk

On Mon, Apr 30 2012 at  5:55am -0400,
Joe Thornber <thornber@redhat.com> wrote:

> Hi Mike,
> 
> In general this looks good.  A lot cleaner now you've dropped the
> specialisation of the division.  A few nit-picks below.
> 
> - Joe
> 
> On Sat, Apr 28, 2012 at 12:44:29AM -0400, Mike Snitzer wrote:
> > +/*
> > + * do_div wrappers that don't modify the dividend
> > + */
> > +static inline sector_t dm_thin_do_div(sector_t a, __u32 b)
> > +{
> > +	sector_t r = a;
> > +
> > +	do_div(r, b);
> > +	return r;
> > +}
> > +
> > +static inline sector_t dm_thin_do_mod(sector_t a, __u32 b)
> > +{
> > +	sector_t tmp = a;
> > +
> > +	return do_div(tmp, b);
> > +}
> 
> Please don't inline static functions.  Let the compiler make the
> decision.

But in this instance we certainly want it inlined; regardless of whether
the compiler might do it anyway.   Why would we ever want to allow the
compiler to not inline it?

> Also those sector_t's are passed by value, so you don't need to
> declare r or tmp.  eg, it's enough to do this:
> 
> static sector_t dm_thin_do_div(sector_t a, __u32 b)
> {
> 	do_div(a, b);
> 	return a;
> }
> 
> static sector_t dm_thin_do_mod(sector_t a, __u32 b)
> {
> 	return do_div(a, b);
> }

OK, makes sense.

> > @@ -1941,12 +1954,18 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
> 
> ...
> 
> > +	if (dm_thin_do_mod(ti->len, block_size)) {
> > +		ti->error = "Data device is not a multiple of block size";
> > +		r = -EINVAL;
> > +		goto out;
> > +	}
> 
> I don't see the need for this check.  If I have a disk that isn't a
> multiple of the block size why should I have to layer a linear mapping
> on it to truncate it before I can use it as a data volume?  Any
> partial block at the end of the device is already ignored (see the
> data_size calculation in pool_preresume).  Is this restriction causing
> some of the changes you made to the test-suite?

Yes.  But imposing this restriction was motivated by and the "attempt to
access beyond end of device" failures I saw with test_origin_unchanged
(I shared more details about this in private mail to you and kabi on
4/25 -- making the pool size a multiple of the non power of 2 blocksize
resolved that failure).

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2
  2012-04-30 17:24   ` Mike Snitzer
@ 2012-04-30 18:36     ` Alasdair G Kergon
  2012-04-30 18:59       ` Mike Snitzer
  0 siblings, 1 reply; 13+ messages in thread
From: Alasdair G Kergon @ 2012-04-30 18:36 UTC (permalink / raw)
  To: Mike Snitzer, Jonathan Brassow; +Cc: dm-devel, ejt, agk

On Mon, Apr 30, 2012 at 01:24:00PM -0400, Mike Snitzer wrote:
> On Mon, Apr 30 2012 at 12:10pm -0400,
> Alasdair G Kergon <agk@redhat.com> wrote:
> > On Sat, Apr 28, 2012 at 12:44:28AM -0400, Mike Snitzer wrote:
> > > Required to support a target's use of a non power of 2 blocksize.
> > For which targets?
> striped and thin-pool for starters.
> > (merge_bvec supported?)
> Yes.
 
But there's overlap between merge_bvec and split_io.
  - Why does stripe_merge() have:

        if (!q->merge_bvec_fn)
                return max_size;

    when it's already done the division?

    - Couldn't that be changed to avoid split_io causing a split?
        (Except, as ever, across a table reload, which prevents us
	 removing it completely.)

> I cannot see why we'd need a split_io that is larger than 32 bits -- a
> 32bit split_io can support up to 2TB (2**32 * 512b sectors).  Even
> on a LBD (raid) the stripe size (split_io) will not be so large.
 
But is that enforced in the raid code or not?

> But what I think what you're driving at is: 

(I'm not convinced the proposed patch has been tested on 32-bit+LBD,
attempting to divide by a 64-bit number etc.)

> is there a benefit/reason to
> maintain the old code for some target that won't ever use non power of 2
> split_io (e.g. dm-raid at the moment)?  I see no point for the duality
> in the code but I'm open to the idea if you have a specific reason in
> mind -- are you concerned about perf on more obscure/older hardware?

EITHER the 32 bit split_io *must* be enforced (after we've convinced
ourselves 64 bits will never be required);
OR we keep it 64-bit and add some compat code.

Alasdair

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2
  2012-04-30 18:36     ` Alasdair G Kergon
@ 2012-04-30 18:59       ` Mike Snitzer
  2012-05-01 15:42         ` Brassow Jonathan
  0 siblings, 1 reply; 13+ messages in thread
From: Mike Snitzer @ 2012-04-30 18:59 UTC (permalink / raw)
  To: Jonathan Brassow, dm-devel, ejt, agk

On Mon, Apr 30 2012 at  2:36pm -0400,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Mon, Apr 30, 2012 at 01:24:00PM -0400, Mike Snitzer wrote:
> > On Mon, Apr 30 2012 at 12:10pm -0400,
> > Alasdair G Kergon <agk@redhat.com> wrote:
> > > On Sat, Apr 28, 2012 at 12:44:28AM -0400, Mike Snitzer wrote:
> > > > Required to support a target's use of a non power of 2 blocksize.
> > > For which targets?
> > striped and thin-pool for starters.
> > > (merge_bvec supported?)
> > Yes.
>  
> But there's overlap between merge_bvec and split_io.
>   - Why does stripe_merge() have:
> 
>         if (!q->merge_bvec_fn)
>                 return max_size;
> 
>     when it's already done the division?
> 
>     - Couldn't that be changed to avoid split_io causing a split?
>         (Except, as ever, across a table reload, which prevents us
> 	 removing it completely.)
> 
> > I cannot see why we'd need a split_io that is larger than 32 bits -- a
> > 32bit split_io can support up to 2TB (2**32 * 512b sectors).  Even
> > on a LBD (raid) the stripe size (split_io) will not be so large.
>  
> But is that enforced in the raid code or not?

No idea, need Jon to weigh in here.  I'm hopeful we can impose 32bit
within dm-raid and coordinate with Neil on getting the appropriate MD
code (chunk_sectors) to reflect the reality that 32 bit is adequate.

> > But what I think what you're driving at is: 
> 
> (I'm not convinced the proposed patch has been tested on 32-bit+LBD,
> attempting to divide by a 64-bit number etc.)

Right, it wasn't tested on 32bit.  It'll fail to build due to split_io
being sector_t.
 
> > is there a benefit/reason to
> > maintain the old code for some target that won't ever use non power of 2
> > split_io (e.g. dm-raid at the moment)?  I see no point for the duality
> > in the code but I'm open to the idea if you have a specific reason in
> > mind -- are you concerned about perf on more obscure/older hardware?
> 
> EITHER the 32 bit split_io *must* be enforced (after we've convinced
> ourselves 64 bits will never be required);
> OR we keep it 64-bit and add some compat code.

Yeap, I'm hopeful we can go with the former.

Jon, what do you think?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] dm thin: support for non power of 2 pool blocksize
  2012-04-30 17:33     ` Mike Snitzer
@ 2012-05-01  9:41       ` Joe Thornber
  0 siblings, 0 replies; 13+ messages in thread
From: Joe Thornber @ 2012-05-01  9:41 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, ejt, agk

On Mon, Apr 30, 2012 at 01:33:47PM -0400, Mike Snitzer wrote:
> Yes.  But imposing this restriction was motivated by and the "attempt to
> access beyond end of device" failures I saw with test_origin_unchanged
> (I shared more details about this in private mail to you and kabi on
> 4/25 -- making the pool size a multiple of the non power of 2 blocksize
> resolved that failure).

This is the wrong fix; you're just hiding the real issue.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2
  2012-04-30 18:59       ` Mike Snitzer
@ 2012-05-01 15:42         ` Brassow Jonathan
  0 siblings, 0 replies; 13+ messages in thread
From: Brassow Jonathan @ 2012-05-01 15:42 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, ejt, agk


[-- Attachment #1.1: Type: text/plain, Size: 2475 bytes --]


On Apr 30, 2012, at 1:59 PM, Mike Snitzer wrote:

>>> 
>>> I cannot see why we'd need a split_io that is larger than 32 bits -- a
>>> 32bit split_io can support up to 2TB (2**32 * 512b sectors).  Even
>>> on a LBD (raid) the stripe size (split_io) will not be so large.
>> 
>> But is that enforced in the raid code or not?
> 
> No idea, need Jon to weigh in here.  I'm hopeful we can impose 32bit
> within dm-raid and coordinate with Neil on getting the appropriate MD
> code (chunk_sectors) to reflect the reality that 32 bit is adequate.
> 
>>> But what I think what you're driving at is: 
>> 
>> (I'm not convinced the proposed patch has been tested on 32-bit+LBD,
>> attempting to divide by a 64-bit number etc.)
> 
> Right, it wasn't tested on 32bit.  It'll fail to build due to split_io
> being sector_t.
> 
>>> is there a benefit/reason to
>>> maintain the old code for some target that won't ever use non power of 2
>>> split_io (e.g. dm-raid at the moment)?  I see no point for the duality
>>> in the code but I'm open to the idea if you have a specific reason in
>>> mind -- are you concerned about perf on more obscure/older hardware?
>> 
>> EITHER the 32 bit split_io *must* be enforced (after we've convinced
>> ourselves 64 bits will never be required);
>> OR we keep it 64-bit and add some compat code.
> 
> Yeap, I'm hopeful we can go with the former.
> 
> Jon, what do you think?

split_io is used by dm-raid to split I/O on 'region_size' and 'chunk_size' boundaries.  The former is used for bitmap purposes for the write intent log (RAID 1/4/5/6).  The later is used by striping targets (RAID 4/5/6).  There are some sanity checks.  They must  be power of 2, region_size must be larger than 'chunk_size', etc.  However, I don't see any enforcement on the size of those numbers.  There probably should be, given that 'chunk_sectors'/'stripe_sectors' are 32-bit 'int' types.  Additionally, 'chunksize' (the name given for 'region_size' in MD) is a 32-bit type on disk and 'unsigned long' in memory - with no checking for size when the in-memory version is stored on disk.

I think these checks should probably be added, but there would be a limitation on the size of arrays imposed, I think.  Neil mentioned that the maximum size of a bitmap was (1 << 21) bits.  If the max 'region_size' is (1 << 32), then the max size device is (1 << 21) * (1 << 32) * (1 << 9) = 4 EiB.  I don't think that's a problem.

 brassow


[-- Attachment #1.2: Type: text/html, Size: 4481 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-05-01 15:42 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-28  4:44 [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2 Mike Snitzer
2012-04-28  4:44 ` [PATCH 2/2] dm thin: support for non power of 2 pool blocksize Mike Snitzer
2012-04-28  4:51   ` [PATCH] thinp-test-suite: " Mike Snitzer
2012-04-28 15:32     ` Mike Snitzer
2012-04-30 10:15     ` [PATCH] " Joe Thornber
2012-04-30  9:55   ` [PATCH 2/2] dm thin: " Joe Thornber
2012-04-30 17:33     ` Mike Snitzer
2012-05-01  9:41       ` Joe Thornber
2012-04-30 16:10 ` [PATCH 1/2] dm: update max_io_len to support a split_io that is not a power of 2 Alasdair G Kergon
2012-04-30 17:24   ` Mike Snitzer
2012-04-30 18:36     ` Alasdair G Kergon
2012-04-30 18:59       ` Mike Snitzer
2012-05-01 15:42         ` Brassow Jonathan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.