All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] dm: add topology support
@ 2009-06-09  5:14 Mike Snitzer
  2009-06-10  6:51 ` Martin K. Petersen
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Snitzer @ 2009-06-09  5:14 UTC (permalink / raw)
  To: dm-devel; +Cc: Martin K. Petersen

Use blk_stack_limits() to stack block limits (including topology) rather
than duplicate the equivalent within Device Mapper.

This patch depends on commit 77634f33d4078542cf1087995cced0ffdae25aa2
from linux-2.6-block +for-next

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c         |  145 +++++++++++-------------------------------
 include/linux/device-mapper.h |   16 ----
 2 files changed, 43 insertions(+), 118 deletions(-)

Index: linux-2.6/drivers/md/dm-table.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-table.c
+++ linux-2.6/drivers/md/dm-table.c
@@ -66,7 +66,7 @@ struct dm_table {
 	 * These are optimistic limits taken from all the
 	 * targets, some targets will need smaller limits.
 	 */
-	struct io_restrictions limits;
+	struct queue_limits limits;
 
 	/* events get handed up using this callback */
 	void (*event_fn)(void *);
@@ -89,43 +89,6 @@ static unsigned int int_log(unsigned int
 }
 
 /*
- * Returns the minimum that is _not_ zero, unless both are zero.
- */
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
-
-/*
- * Combine two io_restrictions, always taking the lower value.
- */
-static void combine_restrictions_low(struct io_restrictions *lhs,
-				     struct io_restrictions *rhs)
-{
-	lhs->max_sectors =
-		min_not_zero(lhs->max_sectors, rhs->max_sectors);
-
-	lhs->max_phys_segments =
-		min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
-
-	lhs->max_hw_segments =
-		min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
-
-	lhs->logical_block_size = max(lhs->logical_block_size,
-				      rhs->logical_block_size);
-
-	lhs->max_segment_size =
-		min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
-
-	lhs->max_hw_sectors =
-		min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors);
-
-	lhs->seg_boundary_mask =
-		min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
-
-	lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
-
-	lhs->no_cluster |= rhs->no_cluster;
-}
-
-/*
  * Calculate the index of the child node of the n'th node k'th key.
  */
 static inline unsigned int get_child(unsigned int n, unsigned int k)
@@ -513,10 +476,14 @@ static int __table_get_device(struct dm_
 	return 0;
 }
 
+/*
+ * Returns the minimum that is _not_ zero, unless both are zero.
+ */
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+
 void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-	struct io_restrictions *rs = &ti->limits;
 	char b[BDEVNAME_SIZE];
 
 	if (unlikely(!q)) {
@@ -525,15 +492,9 @@ void dm_set_device_limits(struct dm_targ
 		return;
 	}
 
-	/*
-	 * Combine the device limits low.
-	 *
-	 * FIXME: if we move an io_restriction struct
-	 *        into q this would just be a call to
-	 *        combine_restrictions_low()
-	 */
-	rs->max_sectors =
-		min_not_zero(rs->max_sectors, queue_max_sectors(q));
+	if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0)
+		DMWARN("%s: target device %s is misaligned",
+		       dm_device_name(ti->table->md), bdevname(bdev, b));
 
 	/*
 	 * Check if merge fn is supported.
@@ -542,33 +503,9 @@ void dm_set_device_limits(struct dm_targ
 	 */
 
 	if (q->merge_bvec_fn && !ti->type->merge)
-		rs->max_sectors =
-			min_not_zero(rs->max_sectors,
+		ti->limits.max_sectors =
+			min_not_zero(ti->limits.max_sectors,
 				     (unsigned int) (PAGE_SIZE >> 9));
-
-	rs->max_phys_segments =
-		min_not_zero(rs->max_phys_segments,
-			     queue_max_phys_segments(q));
-
-	rs->max_hw_segments =
-		min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
-
-	rs->logical_block_size = max(rs->logical_block_size,
-				     queue_logical_block_size(q));
-
-	rs->max_segment_size =
-		min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
-
-	rs->max_hw_sectors =
-		min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
-
-	rs->seg_boundary_mask =
-		min_not_zero(rs->seg_boundary_mask,
-			     queue_segment_boundary(q));
-
-	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
-
-	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
@@ -706,24 +643,28 @@ int dm_split_args(int *argc, char ***arg
 	return 0;
 }
 
-static void check_for_valid_limits(struct io_restrictions *rs)
+static void check_for_valid_limits(struct queue_limits *limits)
 {
-	if (!rs->max_sectors)
-		rs->max_sectors = SAFE_MAX_SECTORS;
-	if (!rs->max_hw_sectors)
-		rs->max_hw_sectors = SAFE_MAX_SECTORS;
-	if (!rs->max_phys_segments)
-		rs->max_phys_segments = MAX_PHYS_SEGMENTS;
-	if (!rs->max_hw_segments)
-		rs->max_hw_segments = MAX_HW_SEGMENTS;
-	if (!rs->logical_block_size)
-		rs->logical_block_size = 1 << SECTOR_SHIFT;
-	if (!rs->max_segment_size)
-		rs->max_segment_size = MAX_SEGMENT_SIZE;
-	if (!rs->seg_boundary_mask)
-		rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
-	if (!rs->bounce_pfn)
-		rs->bounce_pfn = -1;
+	if (!limits->max_sectors)
+		limits->max_sectors = SAFE_MAX_SECTORS;
+	if (!limits->max_hw_sectors)
+		limits->max_hw_sectors = SAFE_MAX_SECTORS;
+	if (!limits->max_phys_segments)
+		limits->max_phys_segments = MAX_PHYS_SEGMENTS;
+	if (!limits->max_hw_segments)
+		limits->max_hw_segments = MAX_HW_SEGMENTS;
+	if (!limits->logical_block_size)
+		limits->logical_block_size = 1 << SECTOR_SHIFT;
+	if (!limits->physical_block_size)
+		limits->physical_block_size = 1 << SECTOR_SHIFT;
+	if (!limits->io_min)
+		limits->io_min = 1 << SECTOR_SHIFT;
+	if (!limits->max_segment_size)
+		limits->max_segment_size = MAX_SEGMENT_SIZE;
+	if (!limits->seg_boundary_mask)
+		limits->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+	if (!limits->bounce_pfn)
+		limits->bounce_pfn = -1;
 }
 
 /*
@@ -843,9 +784,12 @@ int dm_table_add_target(struct dm_table 
 
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-	/* FIXME: the plan is to combine high here and then have
-	 * the merge fn apply the target level restrictions. */
-	combine_restrictions_low(&t->limits, &tgt->limits);
+	if (blk_stack_limits(&t->limits, &tgt->limits, 0) < 0)
+		DMWARN("%s: target device (start sect %llu len %llu) "
+		       "is misaligned",
+		       dm_device_name(t->md),
+		       (unsigned long long) tgt->begin,
+		       (unsigned long long) tgt->len);
 	return 0;
 
  bad:
@@ -1010,17 +954,10 @@ no_integrity:
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 {
 	/*
-	 * Make sure we obey the optimistic sub devices
-	 * restrictions.
+	 * blk_stack_limits() returns if the table is misaligned; but
+	 * dm_table_add_target() already checked the table's alignment
 	 */
-	blk_queue_max_sectors(q, t->limits.max_sectors);
-	blk_queue_max_phys_segments(q, t->limits.max_phys_segments);
-	blk_queue_max_hw_segments(q, t->limits.max_hw_segments);
-	blk_queue_logical_block_size(q, t->limits.logical_block_size);
-	blk_queue_max_segment_size(q, t->limits.max_segment_size);
-	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
-	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
-	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
+	(void)blk_stack_limits(&q->limits, &t->limits, 0);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
Index: linux-2.6/include/linux/device-mapper.h
===================================================================
--- linux-2.6.orig/include/linux/device-mapper.h
+++ linux-2.6/include/linux/device-mapper.h
@@ -143,18 +143,6 @@ struct target_type {
 	struct list_head list;
 };
 
-struct io_restrictions {
-	unsigned long bounce_pfn;
-	unsigned long seg_boundary_mask;
-	unsigned max_hw_sectors;
-	unsigned max_sectors;
-	unsigned max_segment_size;
-	unsigned short logical_block_size;
-	unsigned short max_hw_segments;
-	unsigned short max_phys_segments;
-	unsigned char no_cluster; /* inverted so that 0 is default */
-};
-
 struct dm_target {
 	struct dm_table *table;
 	struct target_type *type;
@@ -163,7 +151,7 @@ struct dm_target {
 	sector_t begin;
 	sector_t len;
 
-	/* FIXME: turn this into a mask, and merge with io_restrictions */
+	/* FIXME: turn this into a mask, and merge with queue_limits */
 	/* Always a power of 2 */
 	sector_t split_io;
 
@@ -171,7 +159,7 @@ struct dm_target {
 	 * These are automatically filled in by
 	 * dm_table_get_device.
 	 */
-	struct io_restrictions limits;
+	struct queue_limits limits;
 
 	/* target specific data */
 	void *private;

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] dm: add topology support
  2009-06-09  5:14 [PATCH v2] dm: add topology support Mike Snitzer
@ 2009-06-10  6:51 ` Martin K. Petersen
  2009-06-10 14:00   ` Mike Snitzer
  0 siblings, 1 reply; 15+ messages in thread
From: Martin K. Petersen @ 2009-06-10  6:51 UTC (permalink / raw)
  To: device-mapper development; +Cc: Martin K. Petersen

>>>>> "Mike" == Mike Snitzer <snitzer@redhat.com> writes:

Mike> Use blk_stack_limits() to stack block limits (including topology)
Mike> rather than duplicate the equivalent within Device Mapper.

Great!  So how are the userland bits coming along?

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] dm: add topology support
  2009-06-10  6:51 ` Martin K. Petersen
@ 2009-06-10 14:00   ` Mike Snitzer
  2009-06-10 14:36     ` Alasdair G Kergon
  2009-06-10 17:58     ` Martin K. Petersen
  0 siblings, 2 replies; 15+ messages in thread
From: Mike Snitzer @ 2009-06-10 14:00 UTC (permalink / raw)
  To: device-mapper development; +Cc: Martin K. Petersen, jens.axboe

On Wed, Jun 10 2009 at  2:51am -0400,
Martin K. Petersen <martin.petersen@oracle.com> wrote:

> >>>>> "Mike" == Mike Snitzer <snitzer@redhat.com> writes:
> 
> Mike> Use blk_stack_limits() to stack block limits (including topology)
> Mike> rather than duplicate the equivalent within Device Mapper.
> 
> Great!  So how are the userland bits coming along?

They should be pretty well sorted out with the patches I posted to
lvm-devel early Monday morning (code review still needed):
https://www.redhat.com/archives/lvm-devel/2009-June/msg00037.html

As for the DM changes.  Alasdair reviewed v2 of the patch and found an
issue that I need to get a final answer to.

Your change to dm-table.c:dm_table_set_restrictions() in linux-2.6-block
+for-next's ae03bf639a5027d27270123f5f6e3ee6a412781d introduced calls to
blk_queue_*() setters.

My v2 of the DM topology support patch does away with those and just
uses blk_stack_limits().  In the process we go back to _not_ using the
blk_queue_*() setters (note the additional checks that those setters
have).

So the question: is _not_ using the blk_queue_*() setters perfectly
fine?  Given that DM has always _not_ used them the quick answer is
"seems fine".

But I need to dig a bit more to understand if the additional logic in
the blk_queue_*() setters is something DM shouldn't be circumventing.

But we're almost done with these DM/LVM2 topology bits.. really :)

Mike

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Re: [PATCH v2] dm: add topology support
  2009-06-10 14:00   ` Mike Snitzer
@ 2009-06-10 14:36     ` Alasdair G Kergon
  2009-06-10 17:58     ` Martin K. Petersen
  1 sibling, 0 replies; 15+ messages in thread
From: Alasdair G Kergon @ 2009-06-10 14:36 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: device-mapper development, Martin K. Petersen, jens.axboe

On Wed, Jun 10, 2009 at 10:00:29AM -0400, Mike Snitzer wrote:
> My v2 of the DM topology support patch does away with those and just
> uses blk_stack_limits().  In the process we go back to _not_ using the
> blk_queue_*() setters (note the additional checks that those setters
> have).
 
That was a secondary point, that could be cleared up by reviewing the
checks to ensure they all automatically hold so never need checking
at that point.

The main problem was that applying the combined limits for the table
previously - and correctly - paid no regard to the existing queue limits
on the dm device.  The new code seems to combine the limits between the
live and inactive tables, which is nonsense as in dm terms they are
totally independent.

Alasdair

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Re: [PATCH v2] dm: add topology support
  2009-06-10 14:00   ` Mike Snitzer
  2009-06-10 14:36     ` Alasdair G Kergon
@ 2009-06-10 17:58     ` Martin K. Petersen
  2009-06-10 21:12       ` Mike Snitzer
  1 sibling, 1 reply; 15+ messages in thread
From: Martin K. Petersen @ 2009-06-10 17:58 UTC (permalink / raw)
  To: device-mapper development; +Cc: Martin K. Petersen, jens.axboe

>>>>> "Mike" == Mike Snitzer <snitzer@redhat.com> writes:

Mike> So the question: is _not_ using the blk_queue_*() setters
Mike> perfectly fine?  Given that DM has always _not_ used them the
Mike> quick answer is "seems fine".

Mike> But I need to dig a bit more to understand if the additional logic
Mike> in the blk_queue_*() setters is something DM shouldn't be
Mike> circumventing.

The original intent was that drivers like DM and MD would seed their
limits using the blk_queue* calls before adding any component devices.
blk_stack_limits() would then scale accordingly for every new device
added.

Is there any reason in particular that this approach wouldn't work for
DM?  I.e. set defaults ahead of time instead of doing it upon table
completion using check_for_valid_limits()?

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] dm: add topology support
  2009-06-10 17:58     ` Martin K. Petersen
@ 2009-06-10 21:12       ` Mike Snitzer
  2009-06-10 22:06         ` Martin K. Petersen
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Snitzer @ 2009-06-10 21:12 UTC (permalink / raw)
  To: device-mapper development; +Cc: Martin K. Petersen, jens.axboe

On Wed, Jun 10 2009 at  1:58pm -0400,
Martin K. Petersen <martin.petersen@oracle.com> wrote:

> >>>>> "Mike" == Mike Snitzer <snitzer@redhat.com> writes:
> 
> Mike> So the question: is _not_ using the blk_queue_*() setters
> Mike> perfectly fine?  Given that DM has always _not_ used them the
> Mike> quick answer is "seems fine".
> 
> Mike> But I need to dig a bit more to understand if the additional logic
> Mike> in the blk_queue_*() setters is something DM shouldn't be
> Mike> circumventing.
> 
> The original intent was that drivers like DM and MD would seed their
> limits using the blk_queue* calls before adding any component devices.
> blk_stack_limits() would then scale accordingly for every new device
> added.
> 
> Is there any reason in particular that this approach wouldn't work for
> DM?  I.e. set defaults ahead of time instead of doing it upon table
> completion using check_for_valid_limits()?

When a table is pushed to DM each target device in the table may have
different limits.  There is no one-size-fits-all default.

With DM, the underlying device that you're sending IO to is a function
of offset into the DM device.  Therefore, the associated IO limits
should really be a function of offset.

Understanding that that is a more complicated proposition we're working
with the topology infrastructure that you've put together as best we
can.

That means we use a device's alignment_offset in userland LVM2 to push
down a data area whose start+size is aligned.  This gives us the
guarantee that each device in a given DM table is aligned.  From there
DM will use blk_stack_limits() to combine all the other topology limits
(alignment_offset is no longer an issue; though we could get some
warnings.. may look to silence them in dm-table.c).

But blk_stack_limits() leads to a situation where the combined limits
(io_min, logical_block_size) are not ideal for all offsets into the
resulting DM device (e.g. issuing larger IOs to some target devices than
would otherwise be needed).

This is not new for DM (historically DM stacked hardsect_size and other
limits in the same fashion), but it doesn't mean that we aren't keeping
in mind that limits as a function of offset would be more appropriate.

Mike

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Re: [PATCH v2] dm: add topology support
  2009-06-10 21:12       ` Mike Snitzer
@ 2009-06-10 22:06         ` Martin K. Petersen
  2009-06-10 23:08           ` Alasdair G Kergon
  0 siblings, 1 reply; 15+ messages in thread
From: Martin K. Petersen @ 2009-06-10 22:06 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: device-mapper development, jens.axboe

>>>>> "Mike" == Mike Snitzer <snitzer@redhat.com> writes:

Mike> When a table is pushed to DM each target device in the table may
Mike> have different limits.  There is no one-size-fits-all default.

But incrementally finding the one size that does fit all (DM dev as well
as targets) is the raison d'être for blk_stack_limits().

The default limits should all be set by the block layer when setting up
the request queue.  So my reason for inquiring was to figure out whether
check_for_valid_limits() actually makes any difference?


Mike> With DM, the underlying device that you're sending IO to is a
Mike> function of offset into the DM device.  Therefore, the associated
Mike> IO limits should really be a function of offset.

This was one of the discussion topics at the Storage and Filesystems
workshop a couple of months ago.  My original topology code actually
permitted a list of topologies for a block device.  With all that
entails of nightmares splicing and dicing lists in the stacking
function.  It was not pretty, and the general consensus at the workshop
was that this was way too complex.  I agreed and the code was gutted.

And even having a topology list failed to correctly describe some valid
configurations.  One pathological case is having a 512-byte
logical/physical in a mirror with a 512/4KB odd-aligned one.  How do you
define a topology for that?  The solution is to adjust the alignment and
scale up io_min to match the 4K drive.  I.e. to change things for the
drive that isn't the "problem".


Mike> That means we use a device's alignment_offset in userland LVM2 to
Mike> push down a data area whose start+size is aligned.  This gives us
Mike> the guarantee that each device in a given DM table is aligned.

*nod*


Mike> But blk_stack_limits() leads to a situation where the combined
Mike> limits (io_min, logical_block_size) are not ideal for all offsets
Mike> into the resulting DM device (e.g. issuing larger IOs to some
Mike> target devices than would otherwise be needed).

Yup.  But not smaller.  That's the whole point.  Making sure we align to
the lowest common denominator and never incur a read-modify-write cycle
on any storage device.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Re: [PATCH v2] dm: add topology support
  2009-06-10 22:06         ` Martin K. Petersen
@ 2009-06-10 23:08           ` Alasdair G Kergon
  2009-06-11  2:21             ` Martin K. Petersen
  2009-06-12  4:17             ` [PATCH v2] dm: add topology support Mike Snitzer
  0 siblings, 2 replies; 15+ messages in thread
From: Alasdair G Kergon @ 2009-06-10 23:08 UTC (permalink / raw)
  To: Martin K. Petersen; +Cc: device-mapper development, Mike Snitzer, jens.axboe

On Wed, Jun 10, 2009 at 06:06:36PM -0400, Martin K. Petersen wrote:
> The default limits should all be set by the block layer when setting up
> the request queue.  So my reason for inquiring was to figure out whether
> check_for_valid_limits() actually makes any difference?
 
I renamed that badly-named function earlier to:
  init_valid_queue_limits()

It should also really be shared with block/.

What's going on is that LVM prepares the new tables it wants to
build up a device stack in advance, then if everything has worked,
makes them all go live.

The validation has to happen during the first phase - backing out
the change to the device stack upon a failure is easier then as
we have not yet reached the commit point of the transaction.
The operation making the new stack live if at all possible must not
fail, because that comes within the commit logic and would make recovery
much trickier.

In dm terms, this means we have two tables - called 'live' and
'inactive'.  The first phase sets up inactive tables on all the stacked
devices involved in the transaction and that is when all the memory
needed is allocated and the validation occurs.  The second phase then
makes the inactive table live and discards the previously-live table.
The two tables are independent: the old queue limits on the dm device
are discarded and replaced by the newly-calculated ones.

Currently those limits are calculated in phase one, but we should see
about delaying this limit combination code (which should alway succeed)
until phase two (which gives userspace code more freedom in its use of
the interface).

Alasdair

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Re: [PATCH v2] dm: add topology support
  2009-06-10 23:08           ` Alasdair G Kergon
@ 2009-06-11  2:21             ` Martin K. Petersen
  2009-06-11  9:43               ` Alasdair G Kergon
  2009-06-12  4:17             ` [PATCH v2] dm: add topology support Mike Snitzer
  1 sibling, 1 reply; 15+ messages in thread
From: Martin K. Petersen @ 2009-06-11  2:21 UTC (permalink / raw)
  To: Alasdair G. Kergon; +Cc: device-mapper development, Mike Snitzer, jens.axboe

>>>>> "agk" == Alasdair G Kergon <agk@redhat.com> writes:

agk> I renamed that badly-named function earlier to:
agk>   init_valid_queue_limits()

agk> It should also really be shared with block/.

How about this?

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1c4df9b..35e9828 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,31 @@ void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
 EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 
 /**
+ * blk_set_default_limits - reset limits to default values
+ * @limits:  the queue_limits structure to reset
+ *
+ * Description:
+ *   Returns a queue_limit struct to its default state.  Can be used by
+ *   stacking drivers like DM that stage table swaps and reuse an
+ *   existing device queue.
+ */
+void blk_set_default_limits(struct queue_limits *lim)
+{
+	lim->max_phys_segments = MAX_PHYS_SEGMENTS;
+	lim->max_hw_segments = MAX_HW_SEGMENTS;
+	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+	lim->max_segment_size = MAX_SEGMENT_SIZE;
+	lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
+	lim->bounce_pfn = BLK_BOUNCE_ANY;
+	lim->alignment_offset = 0;
+	lim->io_opt = 0;
+	lim->misaligned = 0;
+	lim->no_cluster = 0;
+}
+EXPORT_SYMBOL(blk_set_default_limits);
+
+/**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
  * @mfn: the alternate make_request function
@@ -123,18 +148,12 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	 * set defaults
 	 */
 	q->nr_requests = BLKDEV_MAX_RQ;
-	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
-	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
-	blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
-	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
 
 	q->make_request_fn = mfn;
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
-	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
-	blk_queue_logical_block_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
@@ -147,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
 
+	blk_set_default_limits(&q->limits);
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5e740a1..04bbc91 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -921,6 +921,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: Re: [PATCH v2] dm: add topology support
  2009-06-11  2:21             ` Martin K. Petersen
@ 2009-06-11  9:43               ` Alasdair G Kergon
  2009-06-12  4:58                 ` block: Introduce helper to reset queue limits to default values Martin K. Petersen
  0 siblings, 1 reply; 15+ messages in thread
From: Alasdair G Kergon @ 2009-06-11  9:43 UTC (permalink / raw)
  To: Martin K. Petersen; +Cc: device-mapper development, Mike Snitzer, jens.axboe

On Wed, Jun 10, 2009 at 10:21:11PM -0400, Martin K. Petersen wrote:
>  /**
> + * blk_set_default_limits - reset limits to default values
> + * @limits:  the queue_limits structure to reset

Acked-by: Alasdair G Kergon <agk@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] dm: add topology support
  2009-06-10 23:08           ` Alasdair G Kergon
  2009-06-11  2:21             ` Martin K. Petersen
@ 2009-06-12  4:17             ` Mike Snitzer
  2009-06-12  8:45               ` Alasdair G Kergon
  1 sibling, 1 reply; 15+ messages in thread
From: Mike Snitzer @ 2009-06-12  4:17 UTC (permalink / raw)
  To: Alasdair G Kergon
  Cc: device-mapper development, Martin K. Petersen, jens.axboe

On Wed, Jun 10 2009 at  7:08pm -0400,
Alasdair G Kergon <agk@redhat.com> wrote:

> On Wed, Jun 10, 2009 at 06:06:36PM -0400, Martin K. Petersen wrote:
> > The default limits should all be set by the block layer when setting up
> > the request queue.  So my reason for inquiring was to figure out whether
> > check_for_valid_limits() actually makes any difference?
>  
> I renamed that badly-named function earlier to:
>   init_valid_queue_limits()
> 
> It should also really be shared with block/.
> 
> What's going on is that LVM prepares the new tables it wants to
> build up a device stack in advance, then if everything has worked,
> makes them all go live.
> 
> The validation has to happen during the first phase - backing out
> the change to the device stack upon a failure is easier then as
> we have not yet reached the commit point of the transaction.
> The operation making the new stack live if at all possible must not
> fail, because that comes within the commit logic and would make recovery
> much trickier.
> 
> In dm terms, this means we have two tables - called 'live' and
> 'inactive'.  The first phase sets up inactive tables on all the stacked
> devices involved in the transaction and that is when all the memory
> needed is allocated and the validation occurs.  The second phase then
> makes the inactive table live and discards the previously-live table.
> The two tables are independent: the old queue limits on the dm device
> are discarded and replaced by the newly-calculated ones.
> 
> Currently those limits are calculated in phase one, but we should see
> about delaying this limit combination code (which should alway succeed)
> until phase two (which gives userspace code more freedom in its use of
> the interface).

Alasdair,

I've attempted to implement your suggested change of moving the
combining of limits from stage1 (dmsetup load) to stage2 (dmsetup 
resume).

- moved combining the limits for the DM table into stage2
  (dm_table_set_restrictions). 
- init_valid_queue_limits() was removed in favor of Martin's
  blk_set_default_limits()

But the following still establishes each target device's limits during
stage1 (dm_set_device_limits).  I don't see a way to avoid this given
that we only know the table's target devices (and associated bdev and
request_queue) through each target's ctr():

stage1 (dmsetup load)
---------------------
all necessary validation is at table load time
=> dm-ioctl.c:table_load 
   => dm-table.c:dm_table_create
   => dm-ioctl.c:populate_table
      -> dm-table.c:dm_table_add_target
      	 -> tgt->type->ctr()
	    -> dm-table.c:dm_get_device
	       -> dm-table.c:dm_set_device_limits
	       	  -> blk_stack_limits(ti, bdev->q->limits)
		     [NOTE: changed to: ti->limits = q->limits below]
		     [NOTE: this copy can't be delayed, need
		      access to target's bdev; only available through
		      ctr's params]
	 -> BEFORE: blk_stack_limits(table, ti->limits)
	    [NOTE: now delayed, moved to dm_table_set_restrictions]
      -> dm-table.c:dm_table_complete
      	 -> BEFORE: init_valid_queue_limits(table->limits)
	    [NOTE: now delayed, moved to dm_table_set_restrictions]
	    [NOTE: changed call to blk_set_default_limits]

stage2 (dmsetup resume)
-----------------------
swap_table: (use dm_table_set_restrictions hook)
1) inits table limits
2) iterates all target devices, stack limits
3) copies table limits to queue limits

=> dm.c:dm_swap_table
   -> dm.c:__bind
   -> dm-table.c:dm_table_set_restrictions

---
 drivers/md/dm-table.c |   60 ++++++++++++++++----------------------------------
 1 file changed, 20 insertions(+), 40 deletions(-)

Index: linux-2.6/drivers/md/dm-table.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-table.c
+++ linux-2.6/drivers/md/dm-table.c
@@ -492,9 +492,8 @@ void dm_set_device_limits(struct dm_targ
 		return;
 	}
 
-	if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0)
-		DMWARN("%s: target device %s is misaligned",
-		       dm_device_name(ti->table->md), bdevname(bdev, b));
+	/* Copy queue_limits from underlying device */
+	ti->limits = q->limits;
 
 	/*
 	 * Check if merge fn is supported.
@@ -643,34 +642,6 @@ int dm_split_args(int *argc, char ***arg
 	return 0;
 }
 
-static void init_valid_queue_limits(struct queue_limits *limits)
-{
-	if (!limits->max_sectors)
-		limits->max_sectors = SAFE_MAX_SECTORS;
-	if (!limits->max_hw_sectors)
-		limits->max_hw_sectors = SAFE_MAX_SECTORS;
-	if (!limits->max_phys_segments)
-		limits->max_phys_segments = MAX_PHYS_SEGMENTS;
-	if (!limits->max_hw_segments)
-		limits->max_hw_segments = MAX_HW_SEGMENTS;
-	if (!limits->logical_block_size)
-		limits->logical_block_size = 1 << SECTOR_SHIFT;
-	if (!limits->physical_block_size)
-		limits->physical_block_size = 1 << SECTOR_SHIFT;
-	if (!limits->io_min)
-		limits->io_min = 1 << SECTOR_SHIFT;
-	if (!limits->max_segment_size)
-		limits->max_segment_size = MAX_SEGMENT_SIZE;
-	if (!limits->seg_boundary_mask)
-		limits->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
-	if (!limits->bounce_pfn)
-		limits->bounce_pfn = -1;
-	/*
-	 * The other fields (alignment_offset, io_opt, misaligned)
-	 * hold 0 from the kzalloc().
-	 */
-}
-
 /*
  * Impose necessary and sufficient conditions on a devices's table such
  * that any incoming bio which respects its logical_block_size can be
@@ -788,12 +759,6 @@ int dm_table_add_target(struct dm_table 
 
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-	if (blk_stack_limits(&t->limits, &tgt->limits, 0) < 0)
-		DMWARN("%s: target device (start sect %llu len %llu) "
-		       "is misaligned",
-		       dm_device_name(t->md),
-		       (unsigned long long) tgt->begin,
-		       (unsigned long long) tgt->len);
 	return 0;
 
  bad:
@@ -836,8 +801,6 @@ int dm_table_complete(struct dm_table *t
 	int r = 0;
 	unsigned int leaf_nodes;
 
-	init_valid_queue_limits(&t->limits);
-
 	r = validate_hardware_logical_block_alignment(t);
 	if (r)
 		return r;
@@ -958,8 +921,25 @@ no_integrity:
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 {
 	/*
-	 * Copy table's limits to the DM device's request_queue
+	 * Initialize table's queue_limits and merge each underlying
+	 * device's queue_limits with it
+	 */
+	struct dm_target *uninitialized_var(ti);
+	unsigned i = 0;
+
+	blk_set_default_limits(&t->limits);
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+		(void)blk_stack_limits(&t->limits, &ti->limits, 0);
+	}
+
+	/*
+	 * Each target device in the table has a data area that is aligned
+	 * (via LVM2) so the DM device's alignment_offset should be 0.
 	 */
+	t->limits.alignment_offset = 0;
+
+	/* Copy table's queue_limits to the DM device's request_queue */
 	q->limits = t->limits;
 
 	if (t->limits.no_cluster)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* block: Introduce helper to reset queue limits to default values
  2009-06-11  9:43               ` Alasdair G Kergon
@ 2009-06-12  4:58                 ` Martin K. Petersen
  2009-06-15 14:56                   ` Mike Snitzer
  0 siblings, 1 reply; 15+ messages in thread
From: Martin K. Petersen @ 2009-06-12  4:58 UTC (permalink / raw)
  To: jens.axboe; +Cc: device-mapper development, Mike Snitzer, Alasdair G. Kergon


DM reuses the request queue when swapping in a new device table.
Introduce blk_set_default_limits() which can be used to reset the the
queue_limits prior to stacking devices.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Alasdair G Kergon <agk@redhat.com>

---

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1c4df9b..35e9828 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,31 @@ void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
 EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 
 /**
+ * blk_set_default_limits - reset limits to default values
+ * @limits:  the queue_limits structure to reset
+ *
+ * Description:
+ *   Returns a queue_limit struct to its default state.  Can be used by
+ *   stacking drivers like DM that stage table swaps and reuse an
+ *   existing device queue.
+ */
+void blk_set_default_limits(struct queue_limits *lim)
+{
+	lim->max_phys_segments = MAX_PHYS_SEGMENTS;
+	lim->max_hw_segments = MAX_HW_SEGMENTS;
+	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+	lim->max_segment_size = MAX_SEGMENT_SIZE;
+	lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
+	lim->bounce_pfn = BLK_BOUNCE_ANY;
+	lim->alignment_offset = 0;
+	lim->io_opt = 0;
+	lim->misaligned = 0;
+	lim->no_cluster = 0;
+}
+EXPORT_SYMBOL(blk_set_default_limits);
+
+/**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
  * @mfn: the alternate make_request function
@@ -123,18 +148,12 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	 * set defaults
 	 */
 	q->nr_requests = BLKDEV_MAX_RQ;
-	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
-	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
-	blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
-	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
 
 	q->make_request_fn = mfn;
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
-	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
-	blk_queue_logical_block_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
@@ -147,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
 
+	blk_set_default_limits(&q->limits);
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5e740a1..04bbc91 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -921,6 +921,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] dm: add topology support
  2009-06-12  4:17             ` [PATCH v2] dm: add topology support Mike Snitzer
@ 2009-06-12  8:45               ` Alasdair G Kergon
  0 siblings, 0 replies; 15+ messages in thread
From: Alasdair G Kergon @ 2009-06-12  8:45 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: device-mapper development, Martin K. Petersen, jens.axboe

On Fri, Jun 12, 2009 at 12:17:57AM -0400, Mike Snitzer wrote:
> But the following still establishes each target device's limits during
> stage1 (dm_set_device_limits).  I don't see a way to avoid this given
> that we only know the table's target devices (and associated bdev and
> request_queue) through each target's ctr():
 
Also available from:
  struct list_head *dm_table_get_devices(struct dm_table *t)
(A list of struct dm_dev_internal, defined in dm.h.)

We might need to pass the offset at which the device occurs
within the table into dm_get_device() and, for now, only storing
it there first time for each device and assume userspace has taken
account of the alignment offset consistently.

> 	       	  -> blk_stack_limits(ti, bdev->q->limits)
> 		     [NOTE: changed to: ti->limits = q->limits below]
> 		     [NOTE: this copy can't be delayed, need
> 		      access to target's bdev; only available through
> 		      ctr's params]

See above.

> -	if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0)
> -		DMWARN("%s: target device %s is misaligned",
> -		       dm_device_name(ti->table->md), bdevname(bdev, b));

Userspace code is responsible for avoiding these cases, but we could now
move the misalignment warnings (without failing) to dm_table_set_restrictions().

> +	 * Each target device in the table has a data area that is aligned
> +	 * (via LVM2) so the DM device's alignment_offset should be 0.

There are userspace users other than LVM2:-)
Use dm_table_get_devices() here with the offset stored earlier instead,
removing struct queue_limits from struct dm_table.

Alasdair

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: block: Introduce helper to reset queue limits to default values
  2009-06-12  4:58                 ` block: Introduce helper to reset queue limits to default values Martin K. Petersen
@ 2009-06-15 14:56                   ` Mike Snitzer
  2009-06-15 18:44                     ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Snitzer @ 2009-06-15 14:56 UTC (permalink / raw)
  To: jens.axboe; +Cc: device-mapper development, Alasdair G. Kergon, martin.petersen

On Fri, Jun 12 2009 at 12:58am -0400,
Martin K. Petersen <martin.petersen@oracle.com> wrote:

> 
> DM reuses the request queue when swapping in a new device table.
> Introduce blk_set_default_limits() which can be used to reset the the
> queue_limits prior to stacking devices.
> 
> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
> Acked-by: Alasdair G Kergon <agk@redhat.com>

Jens,

Do you intend to provide this patch in your next push to Linus for
2.6.31?  I have a dependency on this patch for DM topology support.

Acked-by: Mike Snitzer <snitzer@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: block: Introduce helper to reset queue limits to default values
  2009-06-15 14:56                   ` Mike Snitzer
@ 2009-06-15 18:44                     ` Jens Axboe
  0 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2009-06-15 18:44 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: device-mapper development, Alasdair G. Kergon, martin.petersen

On Mon, Jun 15 2009, Mike Snitzer wrote:
> On Fri, Jun 12 2009 at 12:58am -0400,
> Martin K. Petersen <martin.petersen@oracle.com> wrote:
> 
> > 
> > DM reuses the request queue when swapping in a new device table.
> > Introduce blk_set_default_limits() which can be used to reset the the
> > queue_limits prior to stacking devices.
> > 
> > Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
> > Acked-by: Alasdair G Kergon <agk@redhat.com>
> 
> Jens,
> 
> Do you intend to provide this patch in your next push to Linus for
> 2.6.31?  I have a dependency on this patch for DM topology support.
> 
> Acked-by: Mike Snitzer <snitzer@redhat.com>

I'll add it for the next 2.6.31 push.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2009-06-15 18:44 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-09  5:14 [PATCH v2] dm: add topology support Mike Snitzer
2009-06-10  6:51 ` Martin K. Petersen
2009-06-10 14:00   ` Mike Snitzer
2009-06-10 14:36     ` Alasdair G Kergon
2009-06-10 17:58     ` Martin K. Petersen
2009-06-10 21:12       ` Mike Snitzer
2009-06-10 22:06         ` Martin K. Petersen
2009-06-10 23:08           ` Alasdair G Kergon
2009-06-11  2:21             ` Martin K. Petersen
2009-06-11  9:43               ` Alasdair G Kergon
2009-06-12  4:58                 ` block: Introduce helper to reset queue limits to default values Martin K. Petersen
2009-06-15 14:56                   ` Mike Snitzer
2009-06-15 18:44                     ` Jens Axboe
2009-06-12  4:17             ` [PATCH v2] dm: add topology support Mike Snitzer
2009-06-12  8:45               ` Alasdair G Kergon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.