linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
@ 2016-07-26  5:19 Bob Liu
  2016-07-26  5:19 ` [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Bob Liu @ 2016-07-26  5:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: xen-devel, konrad.wilk, roger.pau, Bob Liu

Two places didn't get updated when 64KB page granularity was introduced, this
patch fix them.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
---
 drivers/block/xen-blkfront.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index fcc5b4e..032fc94 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1321,7 +1321,7 @@ free_shadow:
 			rinfo->ring_ref[i] = GRANT_INVALID_REF;
 		}
 	}
-	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
 	rinfo->ring.sring = NULL;
 
 	if (rinfo->irq)
@@ -2013,7 +2013,7 @@ static int blkif_recover(struct blkfront_info *info)
 
 	blkfront_gather_backend_features(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	blk_queue_max_segments(info->rq, segs);
+	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
 
 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
 		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits()
  2016-07-26  5:19 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
@ 2016-07-26  5:19 ` Bob Liu
  2016-07-27 10:48   ` Roger Pau Monné
  2016-07-26  5:19 ` [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
  2016-07-28  1:19 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Konrad Rzeszutek Wilk
  2 siblings, 1 reply; 9+ messages in thread
From: Bob Liu @ 2016-07-26  5:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: xen-devel, konrad.wilk, roger.pau, Bob Liu

blk_mq_update_nr_hw_queues() reset all queue limits to default which it's not
as xen-blkfront expected, introducing blkif_set_queue_limits() to reset limits
with initial correct values.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
v2: Move blkif_set_queue_limits() after blkfront_gather_backend_features.
---
 drivers/block/xen-blkfront.c |   87 +++++++++++++++++++++++-------------------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 032fc94..1b4c380 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -189,6 +189,8 @@ struct blkfront_info
 	struct mutex mutex;
 	struct xenbus_device *xbdev;
 	struct gendisk *gd;
+	u16 sector_size;
+	unsigned int physical_sector_size;
 	int vdevice;
 	blkif_vdev_t handle;
 	enum blkif_state connected;
@@ -913,9 +915,45 @@ static struct blk_mq_ops blkfront_mq_ops = {
 	.map_queue = blk_mq_map_queue,
 };
 
+static void blkif_set_queue_limits(struct blkfront_info *info)
+{
+	struct request_queue *rq = info->rq;
+	struct gendisk *gd = info->gd;
+	unsigned int segments = info->max_indirect_segments ? :
+				BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+
+	if (info->feature_discard) {
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+		blk_queue_max_discard_sectors(rq, get_capacity(gd));
+		rq->limits.discard_granularity = info->discard_granularity;
+		rq->limits.discard_alignment = info->discard_alignment;
+		if (info->feature_secdiscard)
+			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
+	}
+
+	/* Hard sector size and max sectors impersonate the equiv. hardware. */
+	blk_queue_logical_block_size(rq, info->sector_size);
+	blk_queue_physical_block_size(rq, info->physical_sector_size);
+	blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+
+	/* Each segment in a request is up to an aligned page in size. */
+	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+	blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+	/* Ensure a merged request will fit in a single I/O ring slot. */
+	blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+
+	/* Make sure buffer addresses are sector-aligned. */
+	blk_queue_dma_alignment(rq, 511);
+
+	/* Make sure we don't use bounce buffers. */
+	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
+}
+
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
-				unsigned int physical_sector_size,
-				unsigned int segments)
+				unsigned int physical_sector_size)
 {
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
@@ -947,37 +985,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 	}
 
 	rq->queuedata = info;
-	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
-
-	if (info->feature_discard) {
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
-		blk_queue_max_discard_sectors(rq, get_capacity(gd));
-		rq->limits.discard_granularity = info->discard_granularity;
-		rq->limits.discard_alignment = info->discard_alignment;
-		if (info->feature_secdiscard)
-			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
-	}
-
-	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_logical_block_size(rq, sector_size);
-	blk_queue_physical_block_size(rq, physical_sector_size);
-	blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
-
-	/* Each segment in a request is up to an aligned page in size. */
-	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-	blk_queue_max_segment_size(rq, PAGE_SIZE);
-
-	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
-
-	/* Make sure buffer addresses are sector-aligned. */
-	blk_queue_dma_alignment(rq, 511);
-
-	/* Make sure we don't use bounce buffers. */
-	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
-
-	gd->queue = rq;
-
+	info->rq = gd->queue = rq;
+	info->gd = gd;
+	info->sector_size = sector_size;
+	info->physical_sector_size = physical_sector_size;
+	blkif_set_queue_limits(info);
 	return 0;
 }
 
@@ -1142,16 +1154,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
-				 info->max_indirect_segments ? :
-				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
 		del_gendisk(gd);
 		goto release;
 	}
 
-	info->rq = gd->queue;
-	info->gd = gd;
-
 	xlvbd_flush(info);
 
 	if (vdisk_info & VDISK_READONLY)
@@ -2012,6 +2019,8 @@ static int blkif_recover(struct blkfront_info *info)
 	struct split_bio *split_bio;
 
 	blkfront_gather_backend_features(info);
+	/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
+	blkif_set_queue_limits(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
 	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-26  5:19 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
  2016-07-26  5:19 ` [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
@ 2016-07-26  5:19 ` Bob Liu
  2016-07-26  8:44   ` Roger Pau Monné
  2016-07-28  1:19 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Konrad Rzeszutek Wilk
  2 siblings, 1 reply; 9+ messages in thread
From: Bob Liu @ 2016-07-26  5:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: xen-devel, konrad.wilk, roger.pau, Bob Liu

The current VBD layer reserves buffer space for each attached device based on
three statically configured settings which are read at boot time.
 * max_indirect_segs: Maximum amount of segments.
 * max_ring_page_order: Maximum order of pages to be used for the shared ring.
 * max_queues: Maximum of queues(rings) to be used.

But the storage backend, workload, and guest memory result in very different
tuning requirements. It's impossible to centrally predict application
characteristics so it's best to leave allow the settings can be dynamiclly
adjusted based on workload inside the Guest.

Usage:
Show current values:
cat /sys/devices/vbd-xxx/max_indirect_segs
cat /sys/devices/vbd-xxx/max_ring_page_order
cat /sys/devices/vbd-xxx/max_queues

Write new values:
echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
echo <new value> > /sys/devices/vbd-xxx/max_queues

Signed-off-by: Bob Liu <bob.liu@oracle.com>
--
v2: Rename to max_ring_page_order and rm the waiting code suggested by Roger.
---
 drivers/block/xen-blkfront.c |  275 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 269 insertions(+), 6 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 1b4c380..ff5ebe5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -212,6 +212,11 @@ struct blkfront_info
 	/* Save uncomplete reqs and bios for migration. */
 	struct list_head requests;
 	struct bio_list bio_list;
+	/* For dynamic configuration. */
+	unsigned int reconfiguring:1;
+	int new_max_indirect_segments;
+	int max_ring_page_order;
+	int max_queues;
 };
 
 static unsigned int nr_minors;
@@ -1350,6 +1355,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	for (i = 0; i < info->nr_rings; i++)
 		blkif_free_ring(&info->rinfo[i]);
 
+	/* Remove old xenstore nodes. */
+	if (info->nr_ring_pages > 1)
+		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
+
+	if (info->nr_rings == 1) {
+		if (info->nr_ring_pages == 1) {
+			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
+		} else {
+			for (i = 0; i < info->nr_ring_pages; i++) {
+				char ring_ref_name[RINGREF_NAME_LEN];
+
+				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
+			}
+		}
+	} else {
+		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
+
+		for (i = 0; i < info->nr_rings; i++) {
+			char queuename[QUEUE_NAME_LEN];
+
+			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
+			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
+		}
+	}
 	kfree(info->rinfo);
 	info->rinfo = NULL;
 	info->nr_rings = 0;
@@ -1763,15 +1793,21 @@ static int talk_to_blkback(struct xenbus_device *dev,
 	const char *message = NULL;
 	struct xenbus_transaction xbt;
 	int err;
-	unsigned int i, max_page_order = 0;
+	unsigned int i, backend_max_order = 0;
 	unsigned int ring_page_order = 0;
 
 	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
-			   "max-ring-page-order", "%u", &max_page_order);
+			   "max-ring-page-order", "%u", &backend_max_order);
 	if (err != 1)
 		info->nr_ring_pages = 1;
 	else {
-		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+		if (info->max_ring_page_order) {
+			/* Dynamic configured through /sys. */
+			BUG_ON(info->max_ring_page_order > backend_max_order);
+			ring_page_order = info->max_ring_page_order;
+		} else
+			/* Default. */
+			ring_page_order = min(xen_blkif_max_ring_order, backend_max_order);
 		info->nr_ring_pages = 1 << ring_page_order;
 	}
 
@@ -1894,7 +1930,14 @@ static int negotiate_mq(struct blkfront_info *info)
 	if (err < 0)
 		backend_max_queues = 1;
 
-	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+	if (info->max_queues) {
+		/* Dynamic configured through /sys */
+		BUG_ON(info->max_queues > backend_max_queues);
+		info->nr_rings = info->max_queues;
+	} else
+		/* Default. */
+		info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+
 	/* We need at least one ring. */
 	if (!info->nr_rings)
 		info->nr_rings = 1;
@@ -2352,11 +2395,197 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
 			    NULL);
 	if (err)
 		info->max_indirect_segments = 0;
-	else
+	else {
 		info->max_indirect_segments = min(indirect_segments,
 						  xen_blkif_max_segments);
+		if (info->new_max_indirect_segments) {
+			BUG_ON(info->new_max_indirect_segments > indirect_segments);
+			info->max_indirect_segments = info->new_max_indirect_segments;
+		}
+	}
+}
+
+static ssize_t max_ring_page_order_show(struct device *dev,
+					struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
 }
 
+static ssize_t max_indirect_segs_show(struct device *dev,
+				      struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", info->max_indirect_segments);
+}
+
+static ssize_t max_queues_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", info->nr_rings);
+}
+
+static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
+{
+	/*
+	 * Prevent new requests even to software request queue.
+	 */
+	blk_mq_freeze_queue(info->rq);
+
+	/*
+	 * Guarantee no uncompleted reqs.
+	 */
+	if (part_in_flight(&info->gd->part0) || info->reconfiguring) {
+		blk_mq_unfreeze_queue(info->rq);
+		pr_err("Dev:%s busy, please retry later.\n", dev_name(&info->xbdev->dev));
+		return -EBUSY;
+	}
+
+	/*
+	 * Frontend: 				Backend:
+	 * Freeze_queue()
+	 * Switch to XenbusStateClosed
+	 *					frontend_changed(StateClosed)
+	 *						> xen_blkif_disconnect()
+	 *						> Switch to XenbusStateClosed
+	 * blkback_changed(StateClosed)
+	 *	> blkfront_resume()
+	 *	> Switch to StateInitialised
+	 *					frontend_changed(StateInitialised):
+	 *						> reconnect
+	 *						> Switch to StateConnected
+	 * blkback_changed(StateConnected)
+	 *	> blkif_recover()
+	 *		> Also switch to StateConnected
+	 *		> Unfreeze_queue()
+	 */
+	info->reconfiguring = true;
+	xenbus_switch_state(info->xbdev, XenbusStateClosed);
+
+	return count;
+}
+
+static ssize_t max_indirect_segs_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_segs = 0, backend_max_segs = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_segs);
+	if (ret < 0)
+		return ret;
+
+	if (max_segs == info->max_indirect_segments)
+		return count;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &backend_max_segs,
+			    NULL);
+	if (err) {
+		pr_err("Backend %s doesn't support feature-indirect-segments.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+
+	if (max_segs > backend_max_segs) {
+		pr_err("Invalid max indirect segment (%u), backend-max: %u.\n",
+			max_segs, backend_max_segs);
+		return -EINVAL;
+	}
+
+	info->new_max_indirect_segments = max_segs;
+
+	return dynamic_reconfig_device(info, count);
+}
+
+static ssize_t max_ring_page_order_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_order = 0, backend_max_order = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_order);
+	if (ret < 0)
+		return ret;
+
+	if ((1 << max_order) == info->nr_ring_pages)
+		return count;
+
+	if (max_order > XENBUS_MAX_RING_GRANT_ORDER) {
+		pr_err("Invalid max_ring_page_order (%u), max: %u.\n",
+				max_order, XENBUS_MAX_RING_GRANT_ORDER);
+		return -EINVAL;
+	}
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "max-ring-page-order", "%u", &backend_max_order);
+	if (err != 1) {
+		pr_err("Backend %s doesn't support feature multi-page-ring.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+	if (max_order > backend_max_order) {
+		pr_err("Invalid max_ring_page_order (%u), backend supports max: %u.\n",
+			max_order, backend_max_order);
+		return -EINVAL;
+	}
+	info->max_ring_page_order = max_order;
+
+	return dynamic_reconfig_device(info, count);
+}
+
+static ssize_t max_queues_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_queues = 0, backend_max_queues = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_queues);
+	if (ret < 0)
+		return ret;
+
+	if (max_queues == info->nr_rings)
+		return count;
+
+	if (max_queues > num_online_cpus()) {
+		pr_err("Invalid max_queues (%u), can't bigger than online cpus: %u.\n",
+			max_queues, num_online_cpus());
+		return -EINVAL;
+	}
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "multi-queue-max-queues", "%u", &backend_max_queues);
+	if (err != 1) {
+		pr_err("Backend %s doesn't support block multi queue.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+	if (max_queues > backend_max_queues) {
+		pr_err("Invalid max_queues (%u), backend supports max: %u.\n",
+			max_queues, backend_max_queues);
+		return -EINVAL;
+	}
+	info->max_queues = max_queues;
+
+	return dynamic_reconfig_device(info, count);
+}
+
+static DEVICE_ATTR_RW(max_queues);
+static DEVICE_ATTR_RW(max_ring_page_order);
+static DEVICE_ATTR_RW(max_indirect_segs);
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -2393,6 +2622,10 @@ static void blkfront_connect(struct blkfront_info *info)
 		 * supports indirect descriptors, and how many.
 		 */
 		blkif_recover(info);
+		if (info->reconfiguring) {
+			blk_mq_unfreeze_queue(info->rq);
+			info->reconfiguring = false;
+		}
 		return;
 
 	default:
@@ -2443,6 +2676,22 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}
 
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+	if (err)
+		goto fail;
+
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
+	if (err) {
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+		goto fail;
+	}
+
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_queues);
+	if (err) {
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
+		goto fail;
+	}
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	/* Kick pending requests. */
@@ -2453,6 +2702,12 @@ static void blkfront_connect(struct blkfront_info *info)
 	add_disk(info->gd);
 
 	info->is_ready = 1;
+	return;
+
+fail:
+	blkif_free(info, 0);
+	xlvbd_release_gendisk(info);
+	return;
 }
 
 /**
@@ -2500,8 +2755,16 @@ static void blkback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateClosed:
-		if (dev->state == XenbusStateClosed)
+		if (dev->state == XenbusStateClosed) {
+			if (info->reconfiguring)
+				if (blkfront_resume(info->xbdev)) {
+					/* Resume failed. */
+					info->reconfiguring = false;
+					xenbus_switch_state(info->xbdev, XenbusStateClosed);
+					pr_err("Resume from dynamic configuration failed\n");
+				}
 			break;
+		}
 		/* Missed the backend's Closing state -- fallthrough */
 	case XenbusStateClosing:
 		if (info)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-26  5:19 ` [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
@ 2016-07-26  8:44   ` Roger Pau Monné
  2016-07-26  8:58     ` Bob Liu
  0 siblings, 1 reply; 9+ messages in thread
From: Roger Pau Monné @ 2016-07-26  8:44 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

On Tue, Jul 26, 2016 at 01:19:37PM +0800, Bob Liu wrote:
> The current VBD layer reserves buffer space for each attached device based on
> three statically configured settings which are read at boot time.
>  * max_indirect_segs: Maximum amount of segments.
>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>  * max_queues: Maximum of queues(rings) to be used.
> 
> But the storage backend, workload, and guest memory result in very different
> tuning requirements. It's impossible to centrally predict application
> characteristics so it's best to leave allow the settings can be dynamiclly
> adjusted based on workload inside the Guest.
> 
> Usage:
> Show current values:
> cat /sys/devices/vbd-xxx/max_indirect_segs
> cat /sys/devices/vbd-xxx/max_ring_page_order
> cat /sys/devices/vbd-xxx/max_queues
> 
> Write new values:
> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
> echo <new value> > /sys/devices/vbd-xxx/max_queues
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> --
> v2: Rename to max_ring_page_order and rm the waiting code suggested by Roger.
> ---
>  drivers/block/xen-blkfront.c |  275 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 269 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 1b4c380..ff5ebe5 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -212,6 +212,11 @@ struct blkfront_info
>  	/* Save uncomplete reqs and bios for migration. */
>  	struct list_head requests;
>  	struct bio_list bio_list;
> +	/* For dynamic configuration. */
> +	unsigned int reconfiguring:1;
> +	int new_max_indirect_segments;

Can't you just use max_indirect_segments? Is it really needed to introduce a 
new struct member?

> +	int max_ring_page_order;
> +	int max_queues;
>  };
>  
>  static unsigned int nr_minors;
> @@ -1350,6 +1355,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>  	for (i = 0; i < info->nr_rings; i++)
>  		blkif_free_ring(&info->rinfo[i]);
>  
> +	/* Remove old xenstore nodes. */
> +	if (info->nr_ring_pages > 1)
> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
> +
> +	if (info->nr_rings == 1) {
> +		if (info->nr_ring_pages == 1) {
> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
> +		} else {
> +			for (i = 0; i < info->nr_ring_pages; i++) {
> +				char ring_ref_name[RINGREF_NAME_LEN];
> +
> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
> +			}
> +		}
> +	} else {
> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
> +
> +		for (i = 0; i < info->nr_rings; i++) {
> +			char queuename[QUEUE_NAME_LEN];
> +
> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
> +		}
> +	}
>  	kfree(info->rinfo);
>  	info->rinfo = NULL;
>  	info->nr_rings = 0;
> @@ -1763,15 +1793,21 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  	const char *message = NULL;
>  	struct xenbus_transaction xbt;
>  	int err;
> -	unsigned int i, max_page_order = 0;
> +	unsigned int i, backend_max_order = 0;
>  	unsigned int ring_page_order = 0;
>  
>  	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
> -			   "max-ring-page-order", "%u", &max_page_order);
> +			   "max-ring-page-order", "%u", &backend_max_order);
>  	if (err != 1)
>  		info->nr_ring_pages = 1;
>  	else {
> -		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
> +		if (info->max_ring_page_order) {
> +			/* Dynamic configured through /sys. */
> +			BUG_ON(info->max_ring_page_order > backend_max_order);

Maybe I'm missing something, but I don't think you can BUG here. The 
following flow for example will trigger this BUG:

 - Admin sets max_ring_page_order = 2 from sysfs, frontend reconfigures.
 - VM is migrated to a new host without multipage ring support.
 - BUG will trigger on reconnection (because backend_max_order == 1 and 
   max_ring_page_order == 2).

Roger.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-26  8:44   ` Roger Pau Monné
@ 2016-07-26  8:58     ` Bob Liu
  2016-07-26 15:48       ` Roger Pau Monné
  0 siblings, 1 reply; 9+ messages in thread
From: Bob Liu @ 2016-07-26  8:58 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: linux-kernel, xen-devel, konrad.wilk


On 07/26/2016 04:44 PM, Roger Pau Monné wrote:
> On Tue, Jul 26, 2016 at 01:19:37PM +0800, Bob Liu wrote:
>> The current VBD layer reserves buffer space for each attached device based on
>> three statically configured settings which are read at boot time.
>>  * max_indirect_segs: Maximum amount of segments.
>>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>>  * max_queues: Maximum of queues(rings) to be used.
>>
>> But the storage backend, workload, and guest memory result in very different
>> tuning requirements. It's impossible to centrally predict application
>> characteristics so it's best to leave allow the settings can be dynamiclly
>> adjusted based on workload inside the Guest.
>>
>> Usage:
>> Show current values:
>> cat /sys/devices/vbd-xxx/max_indirect_segs
>> cat /sys/devices/vbd-xxx/max_ring_page_order
>> cat /sys/devices/vbd-xxx/max_queues
>>
>> Write new values:
>> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
>> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
>> echo <new value> > /sys/devices/vbd-xxx/max_queues
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> --
>> v2: Rename to max_ring_page_order and rm the waiting code suggested by Roger.
>> ---
>>  drivers/block/xen-blkfront.c |  275 +++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 269 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 1b4c380..ff5ebe5 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -212,6 +212,11 @@ struct blkfront_info
>>  	/* Save uncomplete reqs and bios for migration. */
>>  	struct list_head requests;
>>  	struct bio_list bio_list;
>> +	/* For dynamic configuration. */
>> +	unsigned int reconfiguring:1;
>> +	int new_max_indirect_segments;
> 
> Can't you just use max_indirect_segments? Is it really needed to introduce a 
> new struct member?
> 
>> +	int max_ring_page_order;
>> +	int max_queues;

Do you mean also get rid of these two new struct members?
I'll think about that.

>>  };
>>  
>>  static unsigned int nr_minors;
>> @@ -1350,6 +1355,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>>  	for (i = 0; i < info->nr_rings; i++)
>>  		blkif_free_ring(&info->rinfo[i]);
>>  
>> +	/* Remove old xenstore nodes. */
>> +	if (info->nr_ring_pages > 1)
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
>> +
>> +	if (info->nr_rings == 1) {
>> +		if (info->nr_ring_pages == 1) {
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
>> +		} else {
>> +			for (i = 0; i < info->nr_ring_pages; i++) {
>> +				char ring_ref_name[RINGREF_NAME_LEN];
>> +
>> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
>> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
>> +			}
>> +		}
>> +	} else {
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
>> +
>> +		for (i = 0; i < info->nr_rings; i++) {
>> +			char queuename[QUEUE_NAME_LEN];
>> +
>> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
>> +		}
>> +	}
>>  	kfree(info->rinfo);
>>  	info->rinfo = NULL;
>>  	info->nr_rings = 0;
>> @@ -1763,15 +1793,21 @@ static int talk_to_blkback(struct xenbus_device *dev,
>>  	const char *message = NULL;
>>  	struct xenbus_transaction xbt;
>>  	int err;
>> -	unsigned int i, max_page_order = 0;
>> +	unsigned int i, backend_max_order = 0;
>>  	unsigned int ring_page_order = 0;
>>  
>>  	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
>> -			   "max-ring-page-order", "%u", &max_page_order);
>> +			   "max-ring-page-order", "%u", &backend_max_order);
>>  	if (err != 1)
>>  		info->nr_ring_pages = 1;
>>  	else {
>> -		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
>> +		if (info->max_ring_page_order) {
>> +			/* Dynamic configured through /sys. */
>> +			BUG_ON(info->max_ring_page_order > backend_max_order);
> 
> Maybe I'm missing something, but I don't think you can BUG here. The 
> following flow for example will trigger this BUG:
> 

You are right, this BUG will be triggered after removed the waiting code in this version.
Will be updated.

>  - Admin sets max_ring_page_order = 2 from sysfs, frontend reconfigures.
>  - VM is migrated to a new host without multipage ring support.
>  - BUG will trigger on reconnection (because backend_max_order == 1 and 
>    max_ring_page_order == 2).
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-26  8:58     ` Bob Liu
@ 2016-07-26 15:48       ` Roger Pau Monné
  0 siblings, 0 replies; 9+ messages in thread
From: Roger Pau Monné @ 2016-07-26 15:48 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

On Tue, Jul 26, 2016 at 04:58:10PM +0800, Bob Liu wrote:
> 
> On 07/26/2016 04:44 PM, Roger Pau Monné wrote:
> > On Tue, Jul 26, 2016 at 01:19:37PM +0800, Bob Liu wrote:
> >> The current VBD layer reserves buffer space for each attached device based on
> >> three statically configured settings which are read at boot time.
> >>  * max_indirect_segs: Maximum amount of segments.
> >>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
> >>  * max_queues: Maximum of queues(rings) to be used.
> >>
> >> But the storage backend, workload, and guest memory result in very different
> >> tuning requirements. It's impossible to centrally predict application
> >> characteristics so it's best to leave allow the settings can be dynamiclly
> >> adjusted based on workload inside the Guest.
> >>
> >> Usage:
> >> Show current values:
> >> cat /sys/devices/vbd-xxx/max_indirect_segs
> >> cat /sys/devices/vbd-xxx/max_ring_page_order
> >> cat /sys/devices/vbd-xxx/max_queues
> >>
> >> Write new values:
> >> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
> >> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
> >> echo <new value> > /sys/devices/vbd-xxx/max_queues
> >>
> >> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> >> --
> >> v2: Rename to max_ring_page_order and rm the waiting code suggested by Roger.
> >> ---
> >>  drivers/block/xen-blkfront.c |  275 +++++++++++++++++++++++++++++++++++++++++-
> >>  1 file changed, 269 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> >> index 1b4c380..ff5ebe5 100644
> >> --- a/drivers/block/xen-blkfront.c
> >> +++ b/drivers/block/xen-blkfront.c
> >> @@ -212,6 +212,11 @@ struct blkfront_info
> >>  	/* Save uncomplete reqs and bios for migration. */
> >>  	struct list_head requests;
> >>  	struct bio_list bio_list;
> >> +	/* For dynamic configuration. */
> >> +	unsigned int reconfiguring:1;
> >> +	int new_max_indirect_segments;
> > 
> > Can't you just use max_indirect_segments? Is it really needed to introduce a 
> > new struct member?
> > 
> >> +	int max_ring_page_order;
> >> +	int max_queues;
> 
> Do you mean also get rid of these two new struct members?
> I'll think about that.

Oh no, those two are fine, and AFAICT are needed because now every blkfront 
instance can have it's own max number of queues or ring pages. What I think 
can be removed is the introduction of new_max_indirect_segments, and instead 
just use the already available max_indirect_segments field in that same 
struct.

Roger.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits()
  2016-07-26  5:19 ` [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
@ 2016-07-27 10:48   ` Roger Pau Monné
  0 siblings, 0 replies; 9+ messages in thread
From: Roger Pau Monné @ 2016-07-27 10:48 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, konrad.wilk

On Tue, Jul 26, 2016 at 01:19:36PM +0800, Bob Liu wrote:
> blk_mq_update_nr_hw_queues() reset all queue limits to default which it's not
> as xen-blkfront expected, introducing blkif_set_queue_limits() to reset limits
> with initial correct values.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Roger.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
  2016-07-26  5:19 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
  2016-07-26  5:19 ` [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
  2016-07-26  5:19 ` [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
@ 2016-07-28  1:19 ` Konrad Rzeszutek Wilk
  2016-07-28  9:03   ` Bob Liu
  2 siblings, 1 reply; 9+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-07-28  1:19 UTC (permalink / raw)
  To: Bob Liu; +Cc: linux-kernel, xen-devel, roger.pau

On Tue, Jul 26, 2016 at 01:19:35PM +0800, Bob Liu wrote:
> Two places didn't get updated when 64KB page granularity was introduced, this
> patch fix them.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Could you rebase this on xen-tip/for-linus-4.8 pls?

> ---
>  drivers/block/xen-blkfront.c |    4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index fcc5b4e..032fc94 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -1321,7 +1321,7 @@ free_shadow:
>  			rinfo->ring_ref[i] = GRANT_INVALID_REF;
>  		}
>  	}
> -	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
> +	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
>  	rinfo->ring.sring = NULL;
>  
>  	if (rinfo->irq)
> @@ -2013,7 +2013,7 @@ static int blkif_recover(struct blkfront_info *info)
>  
>  	blkfront_gather_backend_features(info);
>  	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
> -	blk_queue_max_segments(info->rq, segs);
> +	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
>  
>  	for (r_index = 0; r_index < info->nr_rings; r_index++) {
>  		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
> -- 
> 1.7.10.4
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
  2016-07-28  1:19 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Konrad Rzeszutek Wilk
@ 2016-07-28  9:03   ` Bob Liu
  0 siblings, 0 replies; 9+ messages in thread
From: Bob Liu @ 2016-07-28  9:03 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: linux-kernel, xen-devel, roger.pau


On 07/28/2016 09:19 AM, Konrad Rzeszutek Wilk wrote:
> On Tue, Jul 26, 2016 at 01:19:35PM +0800, Bob Liu wrote:
>> Two places didn't get updated when 64KB page granularity was introduced, this
>> patch fix them.
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Could you rebase this on xen-tip/for-linus-4.8 pls?

Done, sent the v2 for you to pick up.

> 
>> ---
>>  drivers/block/xen-blkfront.c |    4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index fcc5b4e..032fc94 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -1321,7 +1321,7 @@ free_shadow:
>>  			rinfo->ring_ref[i] = GRANT_INVALID_REF;
>>  		}
>>  	}
>> -	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
>> +	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
>>  	rinfo->ring.sring = NULL;
>>  
>>  	if (rinfo->irq)
>> @@ -2013,7 +2013,7 @@ static int blkif_recover(struct blkfront_info *info)
>>  
>>  	blkfront_gather_backend_features(info);
>>  	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
>> -	blk_queue_max_segments(info->rq, segs);
>> +	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
>>  
>>  	for (r_index = 0; r_index < info->nr_rings; r_index++) {
>>  		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
>> -- 
>> 1.7.10.4
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-07-28  9:04 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-26  5:19 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
2016-07-26  5:19 ` [PATCH v2 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
2016-07-27 10:48   ` Roger Pau Monné
2016-07-26  5:19 ` [PATCH v2 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
2016-07-26  8:44   ` Roger Pau Monné
2016-07-26  8:58     ` Bob Liu
2016-07-26 15:48       ` Roger Pau Monné
2016-07-28  1:19 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Konrad Rzeszutek Wilk
2016-07-28  9:03   ` Bob Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).