[PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
@ 2016-07-15  9:31 Bob Liu
  2016-07-15  9:31 ` [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
                   ` (4 more replies)
  0 siblings, 5 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-15  9:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: jgross, xen-devel, Bob Liu, roger.pau

Two places didn't get updated when 64KB page granularity was introduced, this
patch fix them.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkfront.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index fcc5b4e..032fc94 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1321,7 +1321,7 @@ free_shadow:
 			rinfo->ring_ref[i] = GRANT_INVALID_REF;
 		}
 	}
-	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
 	rinfo->ring.sring = NULL;
 
 	if (rinfo->irq)
@@ -2013,7 +2013,7 @@ static int blkif_recover(struct blkfront_info *info)
 
 	blkfront_gather_backend_features(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	blk_queue_max_segments(info->rq, segs);
+	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
 
 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
 		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits()
  2016-07-15  9:31 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
@ 2016-07-15  9:31 ` Bob Liu
  2016-07-15  9:31 ` [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-15  9:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: jgross, xen-devel, Bob Liu, roger.pau

blk_mq_update_nr_hw_queues() reset all queue limits to default which it's not
as xen-blkfront expected, introducing blkif_set_queue_limits() to reset limits
with initial correct values.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkfront.c | 91 ++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 41 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 032fc94..10f46a8 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -189,6 +189,8 @@ struct blkfront_info
 	struct mutex mutex;
 	struct xenbus_device *xbdev;
 	struct gendisk *gd;
+	u16 sector_size;
+	unsigned int physical_sector_size;
 	int vdevice;
 	blkif_vdev_t handle;
 	enum blkif_state connected;
@@ -913,9 +915,45 @@ static struct blk_mq_ops blkfront_mq_ops = {
 	.map_queue = blk_mq_map_queue,
 };
 
+static void blkif_set_queue_limits(struct blkfront_info *info)
+{
+	struct request_queue *rq = info->rq;
+	struct gendisk *gd = info->gd;
+	unsigned int segments = info->max_indirect_segments ? :
+				BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+
+	if (info->feature_discard) {
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+		blk_queue_max_discard_sectors(rq, get_capacity(gd));
+		rq->limits.discard_granularity = info->discard_granularity;
+		rq->limits.discard_alignment = info->discard_alignment;
+		if (info->feature_secdiscard)
+			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
+	}
+
+	/* Hard sector size and max sectors impersonate the equiv. hardware. */
+	blk_queue_logical_block_size(rq, info->sector_size);
+	blk_queue_physical_block_size(rq, info->physical_sector_size);
+	blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+
+	/* Each segment in a request is up to an aligned page in size. */
+	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+	blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+	/* Ensure a merged request will fit in a single I/O ring slot. */
+	blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+
+	/* Make sure buffer addresses are sector-aligned. */
+	blk_queue_dma_alignment(rq, 511);
+
+	/* Make sure we don't use bounce buffers. */
+	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
+}
+
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
-				unsigned int physical_sector_size,
-				unsigned int segments)
+				unsigned int physical_sector_size)
 {
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
@@ -947,37 +985,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 	}
 
 	rq->queuedata = info;
-	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
-
-	if (info->feature_discard) {
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
-		blk_queue_max_discard_sectors(rq, get_capacity(gd));
-		rq->limits.discard_granularity = info->discard_granularity;
-		rq->limits.discard_alignment = info->discard_alignment;
-		if (info->feature_secdiscard)
-			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
-	}
-
-	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_logical_block_size(rq, sector_size);
-	blk_queue_physical_block_size(rq, physical_sector_size);
-	blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
-
-	/* Each segment in a request is up to an aligned page in size. */
-	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-	blk_queue_max_segment_size(rq, PAGE_SIZE);
-
-	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
-
-	/* Make sure buffer addresses are sector-aligned. */
-	blk_queue_dma_alignment(rq, 511);
-
-	/* Make sure we don't use bounce buffers. */
-	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
-
-	gd->queue = rq;
-
+	info->rq = gd->queue = rq;
+	info->gd = gd;
+	info->sector_size = sector_size;
+	info->physical_sector_size = physical_sector_size;
+	blkif_set_queue_limits(info);
 	return 0;
 }
 
@@ -1142,16 +1154,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
-				 info->max_indirect_segments ? :
-				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
 		del_gendisk(gd);
 		goto release;
 	}
 
-	info->rq = gd->queue;
-	info->gd = gd;
-
 	xlvbd_flush(info);
 
 	if (vdisk_info & VDISK_READONLY)
@@ -2132,9 +2139,11 @@ static int blkfront_resume(struct xenbus_device *dev)
 		return err;
 
 	err = talk_to_blkback(dev, info);
-	if (!err)
+	if (!err) {
 		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
-
+		/* Reset limits changed by blk-mq. */
+		blkif_set_queue_limits(info);
+	}
 	/*
 	 * We have to wait for the backend to switch to
 	 * connected state, since we want to read which
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
  2016-07-15  9:31 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
  2016-07-15  9:31 ` [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
@ 2016-07-15  9:31 ` Bob Liu
  2016-07-21  8:06 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Roger Pau Monné
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-15  9:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: jgross, xen-devel, Bob Liu, roger.pau

The current VBD layer reserves buffer space for each attached device based on
three statically configured settings which are read at boot time.
 * max_indirect_segs: Maximum amount of segments.
 * max_ring_page_order: Maximum order of pages to be used for the shared ring.
 * max_queues: Maximum of queues(rings) to be used.

But the storage backend, workload, and guest memory result in very different
tuning requirements. It's impossible to centrally predict application
characteristics so it's best to leave allow the settings can be dynamiclly
adjusted based on workload inside the Guest.

Usage:
Show current values:
cat /sys/devices/vbd-xxx/max_indirect_segs
cat /sys/devices/vbd-xxx/max_ring_page_order
cat /sys/devices/vbd-xxx/max_queues

Write new values:
echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
echo <new value> > /sys/devices/vbd-xxx/max_queues

Signed-off-by: Bob Liu <bob.liu@oracle.com>
--
v2: Add device lock and other comments from Konrad.
---
 drivers/block/xen-blkfront.c | 285 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 283 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 10f46a8..9a5ed22 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -46,6 +46,7 @@
 #include <linux/scatterlist.h>
 #include <linux/bitmap.h>
 #include <linux/list.h>
+#include <linux/delay.h>
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
@@ -212,6 +213,11 @@ struct blkfront_info
 	/* Save uncomplete reqs and bios for migration. */
 	struct list_head requests;
 	struct bio_list bio_list;
+	/* For dynamic configuration. */
+	unsigned int reconfiguring:1;
+	int new_max_indirect_segments;
+	int new_max_ring_page_order;
+	int new_max_queues;
 };
 
 static unsigned int nr_minors;
@@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	for (i = 0; i < info->nr_rings; i++)
 		blkif_free_ring(&info->rinfo[i]);
 
+	/* Remove old xenstore nodes. */
+	if (info->nr_ring_pages > 1)
+		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
+
+	if (info->nr_rings == 1) {
+		if (info->nr_ring_pages == 1) {
+			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
+		} else {
+			for (i = 0; i < info->nr_ring_pages; i++) {
+				char ring_ref_name[RINGREF_NAME_LEN];
+
+				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
+			}
+		}
+	} else {
+		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
+
+		for (i = 0; i < info->nr_rings; i++) {
+			char queuename[QUEUE_NAME_LEN];
+
+			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
+			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
+		}
+	}
 	kfree(info->rinfo);
 	info->rinfo = NULL;
 	info->nr_rings = 0;
@@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
 		info->nr_ring_pages = 1;
 	else {
 		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+		if (info->new_max_ring_page_order) {
+			BUG_ON(info->new_max_ring_page_order > max_page_order);
+			ring_page_order = info->new_max_ring_page_order;
+		}
 		info->nr_ring_pages = 1 << ring_page_order;
 	}
 
@@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info)
 		backend_max_queues = 1;
 
 	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+	if (info->new_max_queues) {
+		BUG_ON(info->new_max_queues > backend_max_queues);
+		info->nr_rings = info->new_max_queues;
+	}
 	/* We need at least one ring. */
 	if (!info->nr_rings)
 		info->nr_rings = 1;
@@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
 			    NULL);
 	if (err)
 		info->max_indirect_segments = 0;
-	else
+	else {
 		info->max_indirect_segments = min(indirect_segments,
 						  xen_blkif_max_segments);
+		if (info->new_max_indirect_segments) {
+			BUG_ON(info->new_max_indirect_segments > indirect_segments);
+			info->max_indirect_segments = info->new_max_indirect_segments;
+		}
+	}
+}
+
+static ssize_t max_ring_page_order_show(struct device *dev,
+					struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
+}
+
+static ssize_t max_indirect_segs_show(struct device *dev,
+				      struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", info->max_indirect_segments);
+}
+
+static ssize_t max_queues_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct blkfront_info *info = dev_get_drvdata(dev);
+
+	return sprintf(page, "%u\n", info->nr_rings);
+}
+
+static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
+{
+	unsigned int i;
+	int err = -EBUSY;
+
+	/*
+	 * Make sure no migration in parallel, device lock is actually a
+	 * mutex.
+	 */
+	if (!device_trylock(&info->xbdev->dev)) {
+		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
+			dev_name(&info->xbdev->dev));
+		return err;
+	}
+
+	/*
+	 * Prevent new requests and guarantee no uncompleted reqs.
+	 */
+	blk_mq_freeze_queue(info->rq);
+	if (part_in_flight(&info->gd->part0))
+		goto out;
+
+	/*
+	 * Front 				Backend
+	 * Switch to XenbusStateClosed
+	 *					frontend_changed():
+	 *					 case XenbusStateClosed:
+	 *						xen_blkif_disconnect()
+	 *						Switch to XenbusStateClosed
+	 * blkfront_resume():
+	 *					frontend_changed():
+	 *						reconnect
+	 * Wait until XenbusStateConnected
+	 */
+	info->reconfiguring = true;
+	xenbus_switch_state(info->xbdev, XenbusStateClosed);
+
+	/* Poll every 100ms, 1 minute timeout. */
+	for (i = 0; i < 600; i++) {
+		/*
+		 * Wait backend enter XenbusStateClosed, blkback_changed()
+		 * will clear reconfiguring.
+		 */
+		if (!info->reconfiguring)
+			goto resume;
+		schedule_timeout_interruptible(msecs_to_jiffies(100));
+	}
+	goto out;
+
+resume:
+	if (blkfront_resume(info->xbdev))
+		goto out;
+
+	/* Poll every 100ms, 1 minute timeout. */
+	for (i = 0; i < 600; i++) {
+		/* Wait blkfront enter StateConnected which is done by blkif_recover(). */
+		if (info->xbdev->state == XenbusStateConnected) {
+			err = count;
+			goto out;
+		}
+		schedule_timeout_interruptible(msecs_to_jiffies(100));
+	}
+
+out:
+	blk_mq_unfreeze_queue(info->rq);
+	device_unlock(&info->xbdev->dev);
+
+	return err;
+}
+
+static ssize_t max_indirect_segs_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_segs = 0, backend_max_segs = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_segs);
+	if (ret < 0)
+		return ret;
+
+	if (max_segs == info->max_indirect_segments)
+		return count;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &backend_max_segs,
+			    NULL);
+	if (err) {
+		pr_err("Backend %s doesn't support feature-indirect-segments.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+
+	if (max_segs > backend_max_segs) {
+		pr_err("Invalid max indirect segment (%u), backend-max: %u.\n",
+			max_segs, backend_max_segs);
+		return -EINVAL;
+	}
+
+	info->new_max_indirect_segments = max_segs;
+
+	return dynamic_reconfig_device(info, count);
+}
+
+static ssize_t max_ring_page_order_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_order = 0, backend_max_order = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_order);
+	if (ret < 0)
+		return ret;
+
+	if ((1 << max_order) == info->nr_ring_pages)
+		return count;
+
+	if (max_order > XENBUS_MAX_RING_GRANT_ORDER) {
+		pr_err("Invalid max_ring_page_order (%u), max: %u.\n",
+				max_order, XENBUS_MAX_RING_GRANT_ORDER);
+		return -EINVAL;
+	}
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "max-ring-page-order", "%u", &backend_max_order);
+	if (err != 1) {
+		pr_err("Backend %s doesn't support feature multi-page-ring.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+	if (max_order > backend_max_order) {
+		pr_err("Invalid max_ring_page_order (%u), backend supports max: %u.\n",
+			max_order, backend_max_order);
+		return -EINVAL;
+	}
+	info->new_max_ring_page_order = max_order;
+
+	return dynamic_reconfig_device(info, count);
 }
 
+static ssize_t max_queues_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	ssize_t ret;
+	unsigned int max_queues = 0, backend_max_queues = 0;
+	struct blkfront_info *info = dev_get_drvdata(dev);
+	int err;
+
+	ret = kstrtouint(buf, 10, &max_queues);
+	if (ret < 0)
+		return ret;
+
+	if (max_queues == info->nr_rings)
+		return count;
+
+	if (max_queues > num_online_cpus()) {
+		pr_err("Invalid max_queues (%u), can't bigger than online cpus: %u.\n",
+			max_queues, num_online_cpus());
+		return -EINVAL;
+	}
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "multi-queue-max-queues", "%u", &backend_max_queues);
+	if (err != 1) {
+		pr_err("Backend %s doesn't support block multi queue.\n",
+			info->xbdev->otherend);
+		return -EOPNOTSUPP;
+	}
+	if (max_queues > backend_max_queues) {
+		pr_err("Invalid max_queues (%u), backend supports max: %u.\n",
+			max_queues, backend_max_queues);
+		return -EINVAL;
+	}
+	info->new_max_queues = max_queues;
+
+	return dynamic_reconfig_device(info, count);
+}
+
+static DEVICE_ATTR_RW(max_queues);
+static DEVICE_ATTR_RW(max_ring_page_order);
+static DEVICE_ATTR_RW(max_indirect_segs);
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -2443,6 +2698,22 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}
 
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+	if (err)
+		goto fail;
+
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
+	if (err) {
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+		goto fail;
+	}
+
+	err = device_create_file(&info->xbdev->dev, &dev_attr_max_queues);
+	if (err) {
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_ring_page_order);
+		device_remove_file(&info->xbdev->dev, &dev_attr_max_indirect_segs);
+		goto fail;
+	}
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	/* Kick pending requests. */
@@ -2453,6 +2724,12 @@ static void blkfront_connect(struct blkfront_info *info)
 	add_disk(info->gd);
 
 	info->is_ready = 1;
+	return;
+
+fail:
+	blkif_free(info, 0);
+	xlvbd_release_gendisk(info);
+	return;
 }
 
 /**
@@ -2500,8 +2777,12 @@ static void blkback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateClosed:
-		if (dev->state == XenbusStateClosed)
+		if (dev->state == XenbusStateClosed) {
+			/* Clear reconfiguring. */
+			if (info->reconfiguring)
+				info->reconfiguring = false;
 			break;
+		}
 		/* Missed the backend's Closing state -- fallthrough */
 	case XenbusStateClosing:
 		if (info)
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
  2016-07-15  9:31 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
  2016-07-15  9:31 ` [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
  2016-07-15  9:31 ` [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
@ 2016-07-21  8:06 ` Roger Pau Monné
       [not found] ` <1468575109-12209-2-git-send-email-bob.liu@oracle.com>
       [not found] ` <1468575109-12209-3-git-send-email-bob.liu@oracle.com>
  4 siblings, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-21  8:06 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Fri, Jul 15, 2016 at 05:31:47PM +0800, Bob Liu wrote:
> Two places didn't get updated when 64KB page granularity was introduced, this
> patch fix them.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits()
       [not found] ` <1468575109-12209-2-git-send-email-bob.liu@oracle.com>
@ 2016-07-21  8:29   ` Roger Pau Monné
       [not found]   ` <20160721082913.ahsy5a63ymfoymqv@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-21  8:29 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Fri, Jul 15, 2016 at 05:31:48PM +0800, Bob Liu wrote:
> blk_mq_update_nr_hw_queues() reset all queue limits to default which it's not
> as xen-blkfront expected, introducing blkif_set_queue_limits() to reset limits
> with initial correct values.

Hm, great, and as usual in Linux there isn't even a comment in the function 
that explains what it is supposed to do, or what are the side-effects of 
calling blk_mq_update_nr_hw_queues.
 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>
>  drivers/block/xen-blkfront.c | 91 ++++++++++++++++++++++++--------------------
>  1 file changed, 50 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 032fc94..10f46a8 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -189,6 +189,8 @@ struct blkfront_info
>  	struct mutex mutex;
>  	struct xenbus_device *xbdev;
>  	struct gendisk *gd;
> +	u16 sector_size;
> +	unsigned int physical_sector_size;
>  	int vdevice;
>  	blkif_vdev_t handle;
>  	enum blkif_state connected;
> @@ -913,9 +915,45 @@ static struct blk_mq_ops blkfront_mq_ops = {
>  	.map_queue = blk_mq_map_queue,
>  };
>  
> +static void blkif_set_queue_limits(struct blkfront_info *info)
> +{
> +	struct request_queue *rq = info->rq;
> +	struct gendisk *gd = info->gd;
> +	unsigned int segments = info->max_indirect_segments ? :
> +				BLKIF_MAX_SEGMENTS_PER_REQUEST;
> +
> +	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
> +
> +	if (info->feature_discard) {
> +		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
> +		blk_queue_max_discard_sectors(rq, get_capacity(gd));
> +		rq->limits.discard_granularity = info->discard_granularity;
> +		rq->limits.discard_alignment = info->discard_alignment;
> +		if (info->feature_secdiscard)
> +			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
> +	}

AFAICT, at the point this function is called (in blkfront_resume), the 
value of info->feature_discard is still from the old backend, maybe this 
should be called from blkif_recover after blkfront_gather_backend_features?

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found] ` <1468575109-12209-3-git-send-email-bob.liu@oracle.com>
@ 2016-07-21  8:57   ` Roger Pau Monné
       [not found]   ` <20160721085756.ps4rtdns4xh35yii@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-21  8:57 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote:
> The current VBD layer reserves buffer space for each attached device based on
> three statically configured settings which are read at boot time.
>  * max_indirect_segs: Maximum amount of segments.
>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>  * max_queues: Maximum of queues(rings) to be used.
> 
> But the storage backend, workload, and guest memory result in very different
> tuning requirements. It's impossible to centrally predict application
> characteristics so it's best to leave allow the settings can be dynamiclly
> adjusted based on workload inside the Guest.
> 
> Usage:
> Show current values:
> cat /sys/devices/vbd-xxx/max_indirect_segs
> cat /sys/devices/vbd-xxx/max_ring_page_order
> cat /sys/devices/vbd-xxx/max_queues
> 
> Write new values:
> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
> echo <new value> > /sys/devices/vbd-xxx/max_queues
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> --
> v2: Add device lock and other comments from Konrad.
> ---
>  drivers/block/xen-blkfront.c | 285 ++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 283 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 10f46a8..9a5ed22 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -46,6 +46,7 @@
>  #include <linux/scatterlist.h>
>  #include <linux/bitmap.h>
>  #include <linux/list.h>
> +#include <linux/delay.h>
>  
>  #include <xen/xen.h>
>  #include <xen/xenbus.h>
> @@ -212,6 +213,11 @@ struct blkfront_info
>  	/* Save uncomplete reqs and bios for migration. */
>  	struct list_head requests;
>  	struct bio_list bio_list;
> +	/* For dynamic configuration. */
> +	unsigned int reconfiguring:1;
> +	int new_max_indirect_segments;
> +	int new_max_ring_page_order;
> +	int new_max_queues;
>  };
>  
>  static unsigned int nr_minors;
> @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>  	for (i = 0; i < info->nr_rings; i++)
>  		blkif_free_ring(&info->rinfo[i]);
>  
> +	/* Remove old xenstore nodes. */
> +	if (info->nr_ring_pages > 1)
> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
> +
> +	if (info->nr_rings == 1) {
> +		if (info->nr_ring_pages == 1) {
> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
> +		} else {
> +			for (i = 0; i < info->nr_ring_pages; i++) {
> +				char ring_ref_name[RINGREF_NAME_LEN];
> +
> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
> +			}
> +		}
> +	} else {
> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
> +
> +		for (i = 0; i < info->nr_rings; i++) {
> +			char queuename[QUEUE_NAME_LEN];
> +
> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
> +		}
> +	}
>  	kfree(info->rinfo);
>  	info->rinfo = NULL;
>  	info->nr_rings = 0;
> @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
>  		info->nr_ring_pages = 1;
>  	else {
>  		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
> +		if (info->new_max_ring_page_order) {

Instead of calling this "new_max_ring_page_order", could you just call it 
max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default 
and use it everywhere instead of xen_blkif_max_ring_order?

> +			BUG_ON(info->new_max_ring_page_order > max_page_order);
> +			ring_page_order = info->new_max_ring_page_order;
> +		}
>  		info->nr_ring_pages = 1 << ring_page_order;
>  	}
>  
> @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info)
>  		backend_max_queues = 1;
>  
>  	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
> +	if (info->new_max_queues) {

Same here IMHO, this is going to make the code flow slightly easier to 
understand.

> +		BUG_ON(info->new_max_queues > backend_max_queues);
> +		info->nr_rings = info->new_max_queues;
> +	}
>  	/* We need at least one ring. */
>  	if (!info->nr_rings)
>  		info->nr_rings = 1;
> @@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
>  			    NULL);
>  	if (err)
>  		info->max_indirect_segments = 0;
> -	else
> +	else {
>  		info->max_indirect_segments = min(indirect_segments,
>  						  xen_blkif_max_segments);
> +		if (info->new_max_indirect_segments) {
> +			BUG_ON(info->new_max_indirect_segments > indirect_segments);
> +			info->max_indirect_segments = info->new_max_indirect_segments;
> +		}
> +	}
> +}
> +
> +static ssize_t max_ring_page_order_show(struct device *dev,
> +					struct device_attribute *attr, char *page)
> +{
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
> +}
> +
> +static ssize_t max_indirect_segs_show(struct device *dev,
> +				      struct device_attribute *attr, char *page)
> +{
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +	return sprintf(page, "%u\n", info->max_indirect_segments);
> +}
> +
> +static ssize_t max_queues_show(struct device *dev,
> +			       struct device_attribute *attr, char *page)
> +{
> +	struct blkfront_info *info = dev_get_drvdata(dev);
> +
> +	return sprintf(page, "%u\n", info->nr_rings);
> +}
> +
> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> +{
> +	unsigned int i;
> +	int err = -EBUSY;
> +
> +	/*
> +	 * Make sure no migration in parallel, device lock is actually a
> +	 * mutex.
> +	 */
> +	if (!device_trylock(&info->xbdev->dev)) {
> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
> +			dev_name(&info->xbdev->dev));
> +		return err;
> +	}
> +
> +	/*
> +	 * Prevent new requests and guarantee no uncompleted reqs.
> +	 */
> +	blk_mq_freeze_queue(info->rq);
> +	if (part_in_flight(&info->gd->part0))
> +		goto out;
> +
> +	/*
> +	 * Front 				Backend
> +	 * Switch to XenbusStateClosed
> +	 *					frontend_changed():
> +	 *					 case XenbusStateClosed:
> +	 *						xen_blkif_disconnect()
> +	 *						Switch to XenbusStateClosed
> +	 * blkfront_resume():
> +	 *					frontend_changed():
> +	 *						reconnect
> +	 * Wait until XenbusStateConnected
> +	 */
> +	info->reconfiguring = true;
> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
> +
> +	/* Poll every 100ms, 1 minute timeout. */
> +	for (i = 0; i < 600; i++) {
> +		/*
> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
> +		 * will clear reconfiguring.
> +		 */
> +		if (!info->reconfiguring)
> +			goto resume;
> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
> +	}

Instead of having this wait, could you just set info->reconfiguring = 1, set 
the frontend state to XenbusStateClosed and mimic exactly what a resume from 
suspension does? blkback_changed would have to set the frontend state to 
InitWait when it detects that the backend has switched to Closed, and call 
blkfront_resume.

> +	goto out;
> +
> +resume:
> +	if (blkfront_resume(info->xbdev))
> +		goto out;
> +
> +	/* Poll every 100ms, 1 minute timeout. */
> +	for (i = 0; i < 600; i++) {
> +		/* Wait blkfront enter StateConnected which is done by blkif_recover(). */
> +		if (info->xbdev->state == XenbusStateConnected) {
> +			err = count;
> +			goto out;
> +		}
> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
> +	}

Same here, IMHO all this should be much more similar to a resume, and you 
shouldn't need all this wait loops.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits()
       [not found]   ` <20160721082913.ahsy5a63ymfoymqv@mac>
@ 2016-07-21  9:44     ` Bob Liu
  0 siblings, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-21  9:44 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/21/2016 04:29 PM, Roger Pau Monné wrote:
> On Fri, Jul 15, 2016 at 05:31:48PM +0800, Bob Liu wrote:
>> blk_mq_update_nr_hw_queues() reset all queue limits to default which it's not
>> as xen-blkfront expected, introducing blkif_set_queue_limits() to reset limits
>> with initial correct values.
> 
> Hm, great, and as usual in Linux there isn't even a comment in the function 
> that explains what it is supposed to do, or what are the side-effects of 
> calling blk_mq_update_nr_hw_queues.
>  
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>>
>>  drivers/block/xen-blkfront.c | 91 ++++++++++++++++++++++++--------------------
>>  1 file changed, 50 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 032fc94..10f46a8 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -189,6 +189,8 @@ struct blkfront_info
>>  	struct mutex mutex;
>>  	struct xenbus_device *xbdev;
>>  	struct gendisk *gd;
>> +	u16 sector_size;
>> +	unsigned int physical_sector_size;
>>  	int vdevice;
>>  	blkif_vdev_t handle;
>>  	enum blkif_state connected;
>> @@ -913,9 +915,45 @@ static struct blk_mq_ops blkfront_mq_ops = {
>>  	.map_queue = blk_mq_map_queue,
>>  };
>>  
>> +static void blkif_set_queue_limits(struct blkfront_info *info)
>> +{
>> +	struct request_queue *rq = info->rq;
>> +	struct gendisk *gd = info->gd;
>> +	unsigned int segments = info->max_indirect_segments ? :
>> +				BLKIF_MAX_SEGMENTS_PER_REQUEST;
>> +
>> +	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
>> +
>> +	if (info->feature_discard) {
>> +		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
>> +		blk_queue_max_discard_sectors(rq, get_capacity(gd));
>> +		rq->limits.discard_granularity = info->discard_granularity;
>> +		rq->limits.discard_alignment = info->discard_alignment;
>> +		if (info->feature_secdiscard)
>> +			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
>> +	}
> 
> AFAICT, at the point this function is called (in blkfront_resume), the 
> value of info->feature_discard is still from the old backend, maybe this 
> should be called from blkif_recover after blkfront_gather_backend_features?
> 

Thank you for pointing out, will be fixed.

-- 
Regards,
-Bob

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]   ` <20160721085756.ps4rtdns4xh35yii@mac>
@ 2016-07-21 10:08     ` Bob Liu
       [not found]     ` <57909F05.9030809@oracle.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-21 10:08 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote:
>> The current VBD layer reserves buffer space for each attached device based on
>> three statically configured settings which are read at boot time.
>>  * max_indirect_segs: Maximum amount of segments.
>>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
>>  * max_queues: Maximum of queues(rings) to be used.
>>
>> But the storage backend, workload, and guest memory result in very different
>> tuning requirements. It's impossible to centrally predict application
>> characteristics so it's best to leave allow the settings can be dynamiclly
>> adjusted based on workload inside the Guest.
>>
>> Usage:
>> Show current values:
>> cat /sys/devices/vbd-xxx/max_indirect_segs
>> cat /sys/devices/vbd-xxx/max_ring_page_order
>> cat /sys/devices/vbd-xxx/max_queues
>>
>> Write new values:
>> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
>> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
>> echo <new value> > /sys/devices/vbd-xxx/max_queues
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> --
>> v2: Add device lock and other comments from Konrad.
>> ---
>>  drivers/block/xen-blkfront.c | 285 ++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 283 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 10f46a8..9a5ed22 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -46,6 +46,7 @@
>>  #include <linux/scatterlist.h>
>>  #include <linux/bitmap.h>
>>  #include <linux/list.h>
>> +#include <linux/delay.h>
>>  
>>  #include <xen/xen.h>
>>  #include <xen/xenbus.h>
>> @@ -212,6 +213,11 @@ struct blkfront_info
>>  	/* Save uncomplete reqs and bios for migration. */
>>  	struct list_head requests;
>>  	struct bio_list bio_list;
>> +	/* For dynamic configuration. */
>> +	unsigned int reconfiguring:1;
>> +	int new_max_indirect_segments;
>> +	int new_max_ring_page_order;
>> +	int new_max_queues;
>>  };
>>  
>>  static unsigned int nr_minors;
>> @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>>  	for (i = 0; i < info->nr_rings; i++)
>>  		blkif_free_ring(&info->rinfo[i]);
>>  
>> +	/* Remove old xenstore nodes. */
>> +	if (info->nr_ring_pages > 1)
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
>> +
>> +	if (info->nr_rings == 1) {
>> +		if (info->nr_ring_pages == 1) {
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
>> +		} else {
>> +			for (i = 0; i < info->nr_ring_pages; i++) {
>> +				char ring_ref_name[RINGREF_NAME_LEN];
>> +
>> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
>> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
>> +			}
>> +		}
>> +	} else {
>> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
>> +
>> +		for (i = 0; i < info->nr_rings; i++) {
>> +			char queuename[QUEUE_NAME_LEN];
>> +
>> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
>> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
>> +		}
>> +	}
>>  	kfree(info->rinfo);
>>  	info->rinfo = NULL;
>>  	info->nr_rings = 0;
>> @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
>>  		info->nr_ring_pages = 1;
>>  	else {
>>  		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
>> +		if (info->new_max_ring_page_order) {
> 
> Instead of calling this "new_max_ring_page_order", could you just call it 
> max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default 


Sure, I can do that.


> and use it everywhere instead of xen_blkif_max_ring_order?


But "xen_blkif_max_ring_order" still have to be used here, this is the only place "xen_blkif_max_ring_order" is used(except checking the value of it in xlblk_init()).


> 
>> +			BUG_ON(info->new_max_ring_page_order > max_page_order);
>> +			ring_page_order = info->new_max_ring_page_order;
>> +		}
>>  		info->nr_ring_pages = 1 << ring_page_order;
>>  	}
>>  
>> @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info)
>>  		backend_max_queues = 1;
>>  
>>  	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
>> +	if (info->new_max_queues) {
> 
> Same here IMHO, this is going to make the code flow slightly easier to 
> understand.
> 
>> +		BUG_ON(info->new_max_queues > backend_max_queues);
>> +		info->nr_rings = info->new_max_queues;
>> +	}
>>  	/* We need at least one ring. */
>>  	if (!info->nr_rings)
>>  		info->nr_rings = 1;
>> @@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
>>  			    NULL);
>>  	if (err)
>>  		info->max_indirect_segments = 0;
>> -	else
>> +	else {
>>  		info->max_indirect_segments = min(indirect_segments,
>>  						  xen_blkif_max_segments);
>> +		if (info->new_max_indirect_segments) {
>> +			BUG_ON(info->new_max_indirect_segments > indirect_segments);
>> +			info->max_indirect_segments = info->new_max_indirect_segments;
>> +		}
>> +	}
>> +}
>> +
>> +static ssize_t max_ring_page_order_show(struct device *dev,
>> +					struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
>> +}
>> +
>> +static ssize_t max_indirect_segs_show(struct device *dev,
>> +				      struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", info->max_indirect_segments);
>> +}
>> +
>> +static ssize_t max_queues_show(struct device *dev,
>> +			       struct device_attribute *attr, char *page)
>> +{
>> +	struct blkfront_info *info = dev_get_drvdata(dev);
>> +
>> +	return sprintf(page, "%u\n", info->nr_rings);
>> +}
>> +
>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>> +{
>> +	unsigned int i;
>> +	int err = -EBUSY;
>> +
>> +	/*
>> +	 * Make sure no migration in parallel, device lock is actually a
>> +	 * mutex.
>> +	 */
>> +	if (!device_trylock(&info->xbdev->dev)) {
>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
>> +			dev_name(&info->xbdev->dev));
>> +		return err;
>> +	}
>> +
>> +	/*
>> +	 * Prevent new requests and guarantee no uncompleted reqs.
>> +	 */
>> +	blk_mq_freeze_queue(info->rq);
>> +	if (part_in_flight(&info->gd->part0))
>> +		goto out;
>> +
>> +	/*
>> +	 * Front 				Backend
>> +	 * Switch to XenbusStateClosed
>> +	 *					frontend_changed():
>> +	 *					 case XenbusStateClosed:
>> +	 *						xen_blkif_disconnect()
>> +	 *						Switch to XenbusStateClosed
>> +	 * blkfront_resume():
>> +	 *					frontend_changed():
>> +	 *						reconnect
>> +	 * Wait until XenbusStateConnected
>> +	 */
>> +	info->reconfiguring = true;
>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
>> +
>> +	/* Poll every 100ms, 1 minute timeout. */
>> +	for (i = 0; i < 600; i++) {
>> +		/*
>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
>> +		 * will clear reconfiguring.
>> +		 */
>> +		if (!info->reconfiguring)
>> +			goto resume;
>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
>> +	}
> 
> Instead of having this wait, could you just set info->reconfiguring = 1, set 
> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> suspension does? blkback_changed would have to set the frontend state to 
> InitWait when it detects that the backend has switched to Closed, and call 
> blkfront_resume.


I think that won't work.
In the real "resume" case, the power management system will trigger all ->resume() path.
But there is no place for dynamic configuration.

Thanks,
Bob Liu

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]     ` <57909F05.9030809@oracle.com>
@ 2016-07-22  7:45       ` Roger Pau Monné
       [not found]       ` <20160722074506.l5nfcmqg3jzsmxzi@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-22  7:45 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
> 
> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> > On Fri, Jul 15, 2016 at 05:31:49PM +0800, Bob Liu wrote:
> >> The current VBD layer reserves buffer space for each attached device based on
> >> three statically configured settings which are read at boot time.
> >>  * max_indirect_segs: Maximum amount of segments.
> >>  * max_ring_page_order: Maximum order of pages to be used for the shared ring.
> >>  * max_queues: Maximum of queues(rings) to be used.
> >>
> >> But the storage backend, workload, and guest memory result in very different
> >> tuning requirements. It's impossible to centrally predict application
> >> characteristics so it's best to leave allow the settings can be dynamiclly
> >> adjusted based on workload inside the Guest.
> >>
> >> Usage:
> >> Show current values:
> >> cat /sys/devices/vbd-xxx/max_indirect_segs
> >> cat /sys/devices/vbd-xxx/max_ring_page_order
> >> cat /sys/devices/vbd-xxx/max_queues
> >>
> >> Write new values:
> >> echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs
> >> echo <new value> > /sys/devices/vbd-xxx/max_ring_page_order
> >> echo <new value> > /sys/devices/vbd-xxx/max_queues
> >>
> >> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> >> --
> >> v2: Add device lock and other comments from Konrad.
> >> ---
> >>  drivers/block/xen-blkfront.c | 285 ++++++++++++++++++++++++++++++++++++++++++-
> >>  1 file changed, 283 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> >> index 10f46a8..9a5ed22 100644
> >> --- a/drivers/block/xen-blkfront.c
> >> +++ b/drivers/block/xen-blkfront.c
> >> @@ -46,6 +46,7 @@
> >>  #include <linux/scatterlist.h>
> >>  #include <linux/bitmap.h>
> >>  #include <linux/list.h>
> >> +#include <linux/delay.h>
> >>  
> >>  #include <xen/xen.h>
> >>  #include <xen/xenbus.h>
> >> @@ -212,6 +213,11 @@ struct blkfront_info
> >>  	/* Save uncomplete reqs and bios for migration. */
> >>  	struct list_head requests;
> >>  	struct bio_list bio_list;
> >> +	/* For dynamic configuration. */
> >> +	unsigned int reconfiguring:1;
> >> +	int new_max_indirect_segments;
> >> +	int new_max_ring_page_order;
> >> +	int new_max_queues;
> >>  };
> >>  
> >>  static unsigned int nr_minors;
> >> @@ -1350,6 +1356,31 @@ static void blkif_free(struct blkfront_info *info, int suspend)
> >>  	for (i = 0; i < info->nr_rings; i++)
> >>  		blkif_free_ring(&info->rinfo[i]);
> >>  
> >> +	/* Remove old xenstore nodes. */
> >> +	if (info->nr_ring_pages > 1)
> >> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-page-order");
> >> +
> >> +	if (info->nr_rings == 1) {
> >> +		if (info->nr_ring_pages == 1) {
> >> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, "ring-ref");
> >> +		} else {
> >> +			for (i = 0; i < info->nr_ring_pages; i++) {
> >> +				char ring_ref_name[RINGREF_NAME_LEN];
> >> +
> >> +				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
> >> +				xenbus_rm(XBT_NIL, info->xbdev->nodename, ring_ref_name);
> >> +			}
> >> +		}
> >> +	} else {
> >> +		xenbus_rm(XBT_NIL, info->xbdev->nodename, "multi-queue-num-queues");
> >> +
> >> +		for (i = 0; i < info->nr_rings; i++) {
> >> +			char queuename[QUEUE_NAME_LEN];
> >> +
> >> +			snprintf(queuename, QUEUE_NAME_LEN, "queue-%u", i);
> >> +			xenbus_rm(XBT_NIL, info->xbdev->nodename, queuename);
> >> +		}
> >> +	}
> >>  	kfree(info->rinfo);
> >>  	info->rinfo = NULL;
> >>  	info->nr_rings = 0;
> >> @@ -1772,6 +1803,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
> >>  		info->nr_ring_pages = 1;
> >>  	else {
> >>  		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
> >> +		if (info->new_max_ring_page_order) {
> > 
> > Instead of calling this "new_max_ring_page_order", could you just call it 
> > max_ring_page_order, iniitalize it to xen_blkif_max_ring_order by default 
> 
> 
> Sure, I can do that.
> 
> 
> > and use it everywhere instead of xen_blkif_max_ring_order?
> 
> 
> But "xen_blkif_max_ring_order" still have to be used here, this is the only place "xen_blkif_max_ring_order" is used(except checking the value of it in xlblk_init()).
> 
> 
> > 
> >> +			BUG_ON(info->new_max_ring_page_order > max_page_order);
> >> +			ring_page_order = info->new_max_ring_page_order;
> >> +		}
> >>  		info->nr_ring_pages = 1 << ring_page_order;
> >>  	}
> >>  
> >> @@ -1895,6 +1930,10 @@ static int negotiate_mq(struct blkfront_info *info)
> >>  		backend_max_queues = 1;
> >>  
> >>  	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
> >> +	if (info->new_max_queues) {
> > 
> > Same here IMHO, this is going to make the code flow slightly easier to 
> > understand.
> > 
> >> +		BUG_ON(info->new_max_queues > backend_max_queues);
> >> +		info->nr_rings = info->new_max_queues;
> >> +	}
> >>  	/* We need at least one ring. */
> >>  	if (!info->nr_rings)
> >>  		info->nr_rings = 1;
> >> @@ -2352,11 +2391,227 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
> >>  			    NULL);
> >>  	if (err)
> >>  		info->max_indirect_segments = 0;
> >> -	else
> >> +	else {
> >>  		info->max_indirect_segments = min(indirect_segments,
> >>  						  xen_blkif_max_segments);
> >> +		if (info->new_max_indirect_segments) {
> >> +			BUG_ON(info->new_max_indirect_segments > indirect_segments);
> >> +			info->max_indirect_segments = info->new_max_indirect_segments;
> >> +		}
> >> +	}
> >> +}
> >> +
> >> +static ssize_t max_ring_page_order_show(struct device *dev,
> >> +					struct device_attribute *attr, char *page)
> >> +{
> >> +	struct blkfront_info *info = dev_get_drvdata(dev);
> >> +
> >> +	return sprintf(page, "%u\n", get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
> >> +}
> >> +
> >> +static ssize_t max_indirect_segs_show(struct device *dev,
> >> +				      struct device_attribute *attr, char *page)
> >> +{
> >> +	struct blkfront_info *info = dev_get_drvdata(dev);
> >> +
> >> +	return sprintf(page, "%u\n", info->max_indirect_segments);
> >> +}
> >> +
> >> +static ssize_t max_queues_show(struct device *dev,
> >> +			       struct device_attribute *attr, char *page)
> >> +{
> >> +	struct blkfront_info *info = dev_get_drvdata(dev);
> >> +
> >> +	return sprintf(page, "%u\n", info->nr_rings);
> >> +}
> >> +
> >> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> >> +{
> >> +	unsigned int i;
> >> +	int err = -EBUSY;
> >> +
> >> +	/*
> >> +	 * Make sure no migration in parallel, device lock is actually a
> >> +	 * mutex.
> >> +	 */
> >> +	if (!device_trylock(&info->xbdev->dev)) {
> >> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
> >> +			dev_name(&info->xbdev->dev));
> >> +		return err;
> >> +	}
> >> +
> >> +	/*
> >> +	 * Prevent new requests and guarantee no uncompleted reqs.
> >> +	 */
> >> +	blk_mq_freeze_queue(info->rq);
> >> +	if (part_in_flight(&info->gd->part0))
> >> +		goto out;
> >> +
> >> +	/*
> >> +	 * Front 				Backend
> >> +	 * Switch to XenbusStateClosed
> >> +	 *					frontend_changed():
> >> +	 *					 case XenbusStateClosed:
> >> +	 *						xen_blkif_disconnect()
> >> +	 *						Switch to XenbusStateClosed
> >> +	 * blkfront_resume():
> >> +	 *					frontend_changed():
> >> +	 *						reconnect
> >> +	 * Wait until XenbusStateConnected
> >> +	 */
> >> +	info->reconfiguring = true;
> >> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
> >> +
> >> +	/* Poll every 100ms, 1 minute timeout. */
> >> +	for (i = 0; i < 600; i++) {
> >> +		/*
> >> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
> >> +		 * will clear reconfiguring.
> >> +		 */
> >> +		if (!info->reconfiguring)
> >> +			goto resume;
> >> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
> >> +	}
> > 
> > Instead of having this wait, could you just set info->reconfiguring = 1, set 
> > the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> > suspension does? blkback_changed would have to set the frontend state to 
> > InitWait when it detects that the backend has switched to Closed, and call 
> > blkfront_resume.
> 
> 
> I think that won't work.
> In the real "resume" case, the power management system will trigger all ->resume() path.
> But there is no place for dynamic configuration.

Hello,

I think it should be possible to set info->reconfiguring and wait for the 
backend to switch to state Closed, at that point we should call blkif_resume 
(from blkback_changed) and the backend will follow the reconection.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]       ` <20160722074506.l5nfcmqg3jzsmxzi@mac>
@ 2016-07-22  8:17         ` Bob Liu
       [not found]         ` <5791D6AC.1070604@oracle.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-22  8:17 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
> On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
>>
>> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
..[snip]..
>>>> +
>>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>>>> +{
>>>> +	unsigned int i;
>>>> +	int err = -EBUSY;
>>>> +
>>>> +	/*
>>>> +	 * Make sure no migration in parallel, device lock is actually a
>>>> +	 * mutex.
>>>> +	 */
>>>> +	if (!device_trylock(&info->xbdev->dev)) {
>>>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
>>>> +			dev_name(&info->xbdev->dev));
>>>> +		return err;
>>>> +	}
>>>> +
>>>> +	/*
>>>> +	 * Prevent new requests and guarantee no uncompleted reqs.
>>>> +	 */
>>>> +	blk_mq_freeze_queue(info->rq);
>>>> +	if (part_in_flight(&info->gd->part0))
>>>> +		goto out;
>>>> +
>>>> +	/*
>>>> +	 * Front 				Backend
>>>> +	 * Switch to XenbusStateClosed
>>>> +	 *					frontend_changed():
>>>> +	 *					 case XenbusStateClosed:
>>>> +	 *						xen_blkif_disconnect()
>>>> +	 *						Switch to XenbusStateClosed
>>>> +	 * blkfront_resume():
>>>> +	 *					frontend_changed():
>>>> +	 *						reconnect
>>>> +	 * Wait until XenbusStateConnected
>>>> +	 */
>>>> +	info->reconfiguring = true;
>>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
>>>> +
>>>> +	/* Poll every 100ms, 1 minute timeout. */
>>>> +	for (i = 0; i < 600; i++) {
>>>> +		/*
>>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
>>>> +		 * will clear reconfiguring.
>>>> +		 */
>>>> +		if (!info->reconfiguring)
>>>> +			goto resume;
>>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
>>>> +	}
>>>
>>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
>>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
>>> suspension does? blkback_changed would have to set the frontend state to 
>>> InitWait when it detects that the backend has switched to Closed, and call 
>>> blkfront_resume.
>>
>>
>> I think that won't work.
>> In the real "resume" case, the power management system will trigger all ->resume() path.
>> But there is no place for dynamic configuration.
> 
> Hello,
> 
> I think it should be possible to set info->reconfiguring and wait for the 
> backend to switch to state Closed, at that point we should call blkif_resume 
> (from blkback_changed) and the backend will follow the reconection.
> 

Okay, I get your point. Yes, that's an option.

But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
E.g
A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.

-- 
Regards,
-Bob

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]         ` <5791D6AC.1070604@oracle.com>
@ 2016-07-22  9:34           ` Roger Pau Monné
       [not found]           ` <20160722093409.iwcmlubhou4rjjop@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-22  9:34 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Fri, Jul 22, 2016 at 04:17:48PM +0800, Bob Liu wrote:
> 
> On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
> > On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
> >>
> >> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> ..[snip]..
> >>>> +
> >>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> >>>> +{
> >>>> +	unsigned int i;
> >>>> +	int err = -EBUSY;
> >>>> +
> >>>> +	/*
> >>>> +	 * Make sure no migration in parallel, device lock is actually a
> >>>> +	 * mutex.
> >>>> +	 */
> >>>> +	if (!device_trylock(&info->xbdev->dev)) {
> >>>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
> >>>> +			dev_name(&info->xbdev->dev));
> >>>> +		return err;
> >>>> +	}
> >>>> +
> >>>> +	/*
> >>>> +	 * Prevent new requests and guarantee no uncompleted reqs.
> >>>> +	 */
> >>>> +	blk_mq_freeze_queue(info->rq);
> >>>> +	if (part_in_flight(&info->gd->part0))
> >>>> +		goto out;
> >>>> +
> >>>> +	/*
> >>>> +	 * Front 				Backend
> >>>> +	 * Switch to XenbusStateClosed
> >>>> +	 *					frontend_changed():
> >>>> +	 *					 case XenbusStateClosed:
> >>>> +	 *						xen_blkif_disconnect()
> >>>> +	 *						Switch to XenbusStateClosed
> >>>> +	 * blkfront_resume():
> >>>> +	 *					frontend_changed():
> >>>> +	 *						reconnect
> >>>> +	 * Wait until XenbusStateConnected
> >>>> +	 */
> >>>> +	info->reconfiguring = true;
> >>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
> >>>> +
> >>>> +	/* Poll every 100ms, 1 minute timeout. */
> >>>> +	for (i = 0; i < 600; i++) {
> >>>> +		/*
> >>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
> >>>> +		 * will clear reconfiguring.
> >>>> +		 */
> >>>> +		if (!info->reconfiguring)
> >>>> +			goto resume;
> >>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
> >>>> +	}
> >>>
> >>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
> >>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> >>> suspension does? blkback_changed would have to set the frontend state to 
> >>> InitWait when it detects that the backend has switched to Closed, and call 
> >>> blkfront_resume.
> >>
> >>
> >> I think that won't work.
> >> In the real "resume" case, the power management system will trigger all ->resume() path.
> >> But there is no place for dynamic configuration.
> > 
> > Hello,
> > 
> > I think it should be possible to set info->reconfiguring and wait for the 
> > backend to switch to state Closed, at that point we should call blkif_resume 
> > (from blkback_changed) and the backend will follow the reconection.
> > 
> 
> Okay, I get your point. Yes, that's an option.
> 
> But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
> E.g
> A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
> but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.

That should not happen, AFAICT on migration the device never dissapears. 
alloc_disk and friends should not be called on resume from migration (see 
the switch in blkfront_connect, you should take the BLKIF_STATE_SUSPENDED 
path for the reconfiguration).

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]           ` <20160722093409.iwcmlubhou4rjjop@mac>
@ 2016-07-22  9:43             ` Bob Liu
       [not found]             ` <5791EAC4.2030309@oracle.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-22  9:43 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/22/2016 05:34 PM, Roger Pau Monné wrote:
> On Fri, Jul 22, 2016 at 04:17:48PM +0800, Bob Liu wrote:
>>
>> On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
>>> On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
>>>>
>>>> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
>> ..[snip]..
>>>>>> +
>>>>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>>>>>> +{
>>>>>> +	unsigned int i;
>>>>>> +	int err = -EBUSY;
>>>>>> +
>>>>>> +	/*
>>>>>> +	 * Make sure no migration in parallel, device lock is actually a
>>>>>> +	 * mutex.
>>>>>> +	 */
>>>>>> +	if (!device_trylock(&info->xbdev->dev)) {
>>>>>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
>>>>>> +			dev_name(&info->xbdev->dev));
>>>>>> +		return err;
>>>>>> +	}
>>>>>> +
>>>>>> +	/*
>>>>>> +	 * Prevent new requests and guarantee no uncompleted reqs.
>>>>>> +	 */
>>>>>> +	blk_mq_freeze_queue(info->rq);
>>>>>> +	if (part_in_flight(&info->gd->part0))
>>>>>> +		goto out;
>>>>>> +
>>>>>> +	/*
>>>>>> +	 * Front 				Backend
>>>>>> +	 * Switch to XenbusStateClosed
>>>>>> +	 *					frontend_changed():
>>>>>> +	 *					 case XenbusStateClosed:
>>>>>> +	 *						xen_blkif_disconnect()
>>>>>> +	 *						Switch to XenbusStateClosed
>>>>>> +	 * blkfront_resume():
>>>>>> +	 *					frontend_changed():
>>>>>> +	 *						reconnect
>>>>>> +	 * Wait until XenbusStateConnected
>>>>>> +	 */
>>>>>> +	info->reconfiguring = true;
>>>>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
>>>>>> +
>>>>>> +	/* Poll every 100ms, 1 minute timeout. */
>>>>>> +	for (i = 0; i < 600; i++) {
>>>>>> +		/*
>>>>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
>>>>>> +		 * will clear reconfiguring.
>>>>>> +		 */
>>>>>> +		if (!info->reconfiguring)
>>>>>> +			goto resume;
>>>>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
>>>>>> +	}
>>>>>
>>>>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
>>>>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
>>>>> suspension does? blkback_changed would have to set the frontend state to 
>>>>> InitWait when it detects that the backend has switched to Closed, and call 
>>>>> blkfront_resume.
>>>>
>>>>
>>>> I think that won't work.
>>>> In the real "resume" case, the power management system will trigger all ->resume() path.
>>>> But there is no place for dynamic configuration.
>>>
>>> Hello,
>>>
>>> I think it should be possible to set info->reconfiguring and wait for the 
>>> backend to switch to state Closed, at that point we should call blkif_resume 
>>> (from blkback_changed) and the backend will follow the reconection.
>>>
>>
>> Okay, I get your point. Yes, that's an option.
>>
>> But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
>> E.g
>> A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
>> but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.
> 
> That should not happen, AFAICT on migration the device never dissapears. 

Oh, yes.

> alloc_disk and friends should not be called on resume from migration (see 
> the switch in blkfront_connect, you should take the BLKIF_STATE_SUSPENDED 
> path for the reconfiguration).
> 

What about if the end-user starts I/O immediately after writing new value to /sys?
But the resume is still in progress.

-- 
Regards,
-Bob

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]             ` <5791EAC4.2030309@oracle.com>
@ 2016-07-22 11:45               ` Roger Pau Monné
       [not found]               ` <20160722114538.7un36zw7mrvcueob@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-22 11:45 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Fri, Jul 22, 2016 at 05:43:32PM +0800, Bob Liu wrote:
> 
> On 07/22/2016 05:34 PM, Roger Pau Monné wrote:
> > On Fri, Jul 22, 2016 at 04:17:48PM +0800, Bob Liu wrote:
> >>
> >> On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
> >>> On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
> >>>>
> >>>> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> >> ..[snip]..
> >>>>>> +
> >>>>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> >>>>>> +{
> >>>>>> +	unsigned int i;
> >>>>>> +	int err = -EBUSY;
> >>>>>> +
> >>>>>> +	/*
> >>>>>> +	 * Make sure no migration in parallel, device lock is actually a
> >>>>>> +	 * mutex.
> >>>>>> +	 */
> >>>>>> +	if (!device_trylock(&info->xbdev->dev)) {
> >>>>>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
> >>>>>> +			dev_name(&info->xbdev->dev));
> >>>>>> +		return err;
> >>>>>> +	}
> >>>>>> +
> >>>>>> +	/*
> >>>>>> +	 * Prevent new requests and guarantee no uncompleted reqs.
> >>>>>> +	 */
> >>>>>> +	blk_mq_freeze_queue(info->rq);
> >>>>>> +	if (part_in_flight(&info->gd->part0))
> >>>>>> +		goto out;
> >>>>>> +
> >>>>>> +	/*
> >>>>>> +	 * Front 				Backend
> >>>>>> +	 * Switch to XenbusStateClosed
> >>>>>> +	 *					frontend_changed():
> >>>>>> +	 *					 case XenbusStateClosed:
> >>>>>> +	 *						xen_blkif_disconnect()
> >>>>>> +	 *						Switch to XenbusStateClosed
> >>>>>> +	 * blkfront_resume():
> >>>>>> +	 *					frontend_changed():
> >>>>>> +	 *						reconnect
> >>>>>> +	 * Wait until XenbusStateConnected
> >>>>>> +	 */
> >>>>>> +	info->reconfiguring = true;
> >>>>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
> >>>>>> +
> >>>>>> +	/* Poll every 100ms, 1 minute timeout. */
> >>>>>> +	for (i = 0; i < 600; i++) {
> >>>>>> +		/*
> >>>>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
> >>>>>> +		 * will clear reconfiguring.
> >>>>>> +		 */
> >>>>>> +		if (!info->reconfiguring)
> >>>>>> +			goto resume;
> >>>>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
> >>>>>> +	}
> >>>>>
> >>>>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
> >>>>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> >>>>> suspension does? blkback_changed would have to set the frontend state to 
> >>>>> InitWait when it detects that the backend has switched to Closed, and call 
> >>>>> blkfront_resume.
> >>>>
> >>>>
> >>>> I think that won't work.
> >>>> In the real "resume" case, the power management system will trigger all ->resume() path.
> >>>> But there is no place for dynamic configuration.
> >>>
> >>> Hello,
> >>>
> >>> I think it should be possible to set info->reconfiguring and wait for the 
> >>> backend to switch to state Closed, at that point we should call blkif_resume 
> >>> (from blkback_changed) and the backend will follow the reconection.
> >>>
> >>
> >> Okay, I get your point. Yes, that's an option.
> >>
> >> But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
> >> E.g
> >> A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
> >> but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.
> > 
> > That should not happen, AFAICT on migration the device never dissapears. 
> 
> Oh, yes.
> 
> > alloc_disk and friends should not be called on resume from migration (see 
> > the switch in blkfront_connect, you should take the BLKIF_STATE_SUSPENDED 
> > path for the reconfiguration).
> > 
> 
> What about if the end-user starts I/O immediately after writing new value to /sys?
> But the resume is still in progress.

blkif_free already stops the queues by calling blk_mq_stop_hw_queues, and 
blkif_queue_request will refuse to put anything on the ring if the state 
is different than connected, which in turn makes blkif_queue_rq call 
blk_mq_stop_hw_queue to stop the queue, so it should be safe.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]               ` <20160722114538.7un36zw7mrvcueob@mac>
@ 2016-07-22 22:18                 ` Bob Liu
       [not found]                 ` <57929BAF.4030304@oracle.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-22 22:18 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/22/2016 07:45 PM, Roger Pau Monné wrote:
> On Fri, Jul 22, 2016 at 05:43:32PM +0800, Bob Liu wrote:
>>
>> On 07/22/2016 05:34 PM, Roger Pau Monné wrote:
>>> On Fri, Jul 22, 2016 at 04:17:48PM +0800, Bob Liu wrote:
>>>>
>>>> On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
>>>>> On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
>>>>>>
>>>>>> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
>>>> ..[snip]..
>>>>>>>> +
>>>>>>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>>>>>>>> +{
>>>>>>>> +	unsigned int i;
>>>>>>>> +	int err = -EBUSY;
>>>>>>>> +
>>>>>>>> +	/*
>>>>>>>> +	 * Make sure no migration in parallel, device lock is actually a
>>>>>>>> +	 * mutex.
>>>>>>>> +	 */
>>>>>>>> +	if (!device_trylock(&info->xbdev->dev)) {
>>>>>>>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
>>>>>>>> +			dev_name(&info->xbdev->dev));
>>>>>>>> +		return err;
>>>>>>>> +	}
>>>>>>>> +
>>>>>>>> +	/*
>>>>>>>> +	 * Prevent new requests and guarantee no uncompleted reqs.
>>>>>>>> +	 */
>>>>>>>> +	blk_mq_freeze_queue(info->rq);
>>>>>>>> +	if (part_in_flight(&info->gd->part0))
>>>>>>>> +		goto out;
>>>>>>>> +
>>>>>>>> +	/*
>>>>>>>> +	 * Front 				Backend
>>>>>>>> +	 * Switch to XenbusStateClosed
>>>>>>>> +	 *					frontend_changed():
>>>>>>>> +	 *					 case XenbusStateClosed:
>>>>>>>> +	 *						xen_blkif_disconnect()
>>>>>>>> +	 *						Switch to XenbusStateClosed
>>>>>>>> +	 * blkfront_resume():
>>>>>>>> +	 *					frontend_changed():
>>>>>>>> +	 *						reconnect
>>>>>>>> +	 * Wait until XenbusStateConnected
>>>>>>>> +	 */
>>>>>>>> +	info->reconfiguring = true;
>>>>>>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
>>>>>>>> +
>>>>>>>> +	/* Poll every 100ms, 1 minute timeout. */
>>>>>>>> +	for (i = 0; i < 600; i++) {
>>>>>>>> +		/*
>>>>>>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
>>>>>>>> +		 * will clear reconfiguring.
>>>>>>>> +		 */
>>>>>>>> +		if (!info->reconfiguring)
>>>>>>>> +			goto resume;
>>>>>>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
>>>>>>>> +	}
>>>>>>>
>>>>>>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
>>>>>>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
>>>>>>> suspension does? blkback_changed would have to set the frontend state to 
>>>>>>> InitWait when it detects that the backend has switched to Closed, and call 
>>>>>>> blkfront_resume.
>>>>>>
>>>>>>
>>>>>> I think that won't work.
>>>>>> In the real "resume" case, the power management system will trigger all ->resume() path.
>>>>>> But there is no place for dynamic configuration.
>>>>>
>>>>> Hello,
>>>>>
>>>>> I think it should be possible to set info->reconfiguring and wait for the 
>>>>> backend to switch to state Closed, at that point we should call blkif_resume 
>>>>> (from blkback_changed) and the backend will follow the reconection.
>>>>>
>>>>
>>>> Okay, I get your point. Yes, that's an option.
>>>>
>>>> But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
>>>> E.g
>>>> A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
>>>> but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.
>>>
>>> That should not happen, AFAICT on migration the device never dissapears. 
>>
>> Oh, yes.
>>
>>> alloc_disk and friends should not be called on resume from migration (see 
>>> the switch in blkfront_connect, you should take the BLKIF_STATE_SUSPENDED 
>>> path for the reconfiguration).
>>>
>>
>> What about if the end-user starts I/O immediately after writing new value to /sys?
>> But the resume is still in progress.
> 
> blkif_free already stops the queues by calling blk_mq_stop_hw_queues, and 
> blkif_queue_request will refuse to put anything on the ring if the state 
> is different than connected, which in turn makes blkif_queue_rq call 
> blk_mq_stop_hw_queue to stop the queue, so it should be safe.
> 

But this will surprise the end-user, our user script is like this:
1) echo <new value> > /sys/xxxx
2) Start I/O immediately.
	^^^ Fail because requests would be refused(even software queue was still freezed).

It's not good for the end user have to wait for a random time before restart I/O.


There are two more concerns I have:
 * blkif_resume() may fail, how the end-user can aware that if "echo <new value> > /sys/xxx" already returned success.
 
 * We get the device lock and blk_mq_freeze_queue() in dynamic_reconfig_device(),
   and then have to release in blkif_recover() asynchronously which makes the code more difficult to readable.

As a result, I still prefer current synchronize way so that we can know whether blkif_resume fails,
not break the end-user and more straightforward code.
-- 
Regards,
-Bob

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]                 ` <57929BAF.4030304@oracle.com>
@ 2016-07-25  9:20                   ` Roger Pau Monné
       [not found]                   ` <20160725092002.madb7j6ryn4jcoho@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-25  9:20 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Sat, Jul 23, 2016 at 06:18:23AM +0800, Bob Liu wrote:
> 
> On 07/22/2016 07:45 PM, Roger Pau Monné wrote:
> > On Fri, Jul 22, 2016 at 05:43:32PM +0800, Bob Liu wrote:
> >>
> >> On 07/22/2016 05:34 PM, Roger Pau Monné wrote:
> >>> On Fri, Jul 22, 2016 at 04:17:48PM +0800, Bob Liu wrote:
> >>>>
> >>>> On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
> >>>>> On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
> >>>>>>
> >>>>>> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> >>>> ..[snip]..
> >>>>>>>> +
> >>>>>>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
> >>>>>>>> +{
> >>>>>>>> +	unsigned int i;
> >>>>>>>> +	int err = -EBUSY;
> >>>>>>>> +
> >>>>>>>> +	/*
> >>>>>>>> +	 * Make sure no migration in parallel, device lock is actually a
> >>>>>>>> +	 * mutex.
> >>>>>>>> +	 */
> >>>>>>>> +	if (!device_trylock(&info->xbdev->dev)) {
> >>>>>>>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
> >>>>>>>> +			dev_name(&info->xbdev->dev));
> >>>>>>>> +		return err;
> >>>>>>>> +	}
> >>>>>>>> +
> >>>>>>>> +	/*
> >>>>>>>> +	 * Prevent new requests and guarantee no uncompleted reqs.
> >>>>>>>> +	 */
> >>>>>>>> +	blk_mq_freeze_queue(info->rq);
> >>>>>>>> +	if (part_in_flight(&info->gd->part0))
> >>>>>>>> +		goto out;
> >>>>>>>> +
> >>>>>>>> +	/*
> >>>>>>>> +	 * Front 				Backend
> >>>>>>>> +	 * Switch to XenbusStateClosed
> >>>>>>>> +	 *					frontend_changed():
> >>>>>>>> +	 *					 case XenbusStateClosed:
> >>>>>>>> +	 *						xen_blkif_disconnect()
> >>>>>>>> +	 *						Switch to XenbusStateClosed
> >>>>>>>> +	 * blkfront_resume():
> >>>>>>>> +	 *					frontend_changed():
> >>>>>>>> +	 *						reconnect
> >>>>>>>> +	 * Wait until XenbusStateConnected
> >>>>>>>> +	 */
> >>>>>>>> +	info->reconfiguring = true;
> >>>>>>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
> >>>>>>>> +
> >>>>>>>> +	/* Poll every 100ms, 1 minute timeout. */
> >>>>>>>> +	for (i = 0; i < 600; i++) {
> >>>>>>>> +		/*
> >>>>>>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
> >>>>>>>> +		 * will clear reconfiguring.
> >>>>>>>> +		 */
> >>>>>>>> +		if (!info->reconfiguring)
> >>>>>>>> +			goto resume;
> >>>>>>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
> >>>>>>>> +	}
> >>>>>>>
> >>>>>>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
> >>>>>>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> >>>>>>> suspension does? blkback_changed would have to set the frontend state to 
> >>>>>>> InitWait when it detects that the backend has switched to Closed, and call 
> >>>>>>> blkfront_resume.
> >>>>>>
> >>>>>>
> >>>>>> I think that won't work.
> >>>>>> In the real "resume" case, the power management system will trigger all ->resume() path.
> >>>>>> But there is no place for dynamic configuration.
> >>>>>
> >>>>> Hello,
> >>>>>
> >>>>> I think it should be possible to set info->reconfiguring and wait for the 
> >>>>> backend to switch to state Closed, at that point we should call blkif_resume 
> >>>>> (from blkback_changed) and the backend will follow the reconection.
> >>>>>
> >>>>
> >>>> Okay, I get your point. Yes, that's an option.
> >>>>
> >>>> But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
> >>>> E.g
> >>>> A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
> >>>> but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.
> >>>
> >>> That should not happen, AFAICT on migration the device never dissapears. 
> >>
> >> Oh, yes.
> >>
> >>> alloc_disk and friends should not be called on resume from migration (see 
> >>> the switch in blkfront_connect, you should take the BLKIF_STATE_SUSPENDED 
> >>> path for the reconfiguration).
> >>>
> >>
> >> What about if the end-user starts I/O immediately after writing new value to /sys?
> >> But the resume is still in progress.
> > 
> > blkif_free already stops the queues by calling blk_mq_stop_hw_queues, and 
> > blkif_queue_request will refuse to put anything on the ring if the state 
> > is different than connected, which in turn makes blkif_queue_rq call 
> > blk_mq_stop_hw_queue to stop the queue, so it should be safe.
> > 
> 
> But this will surprise the end-user, our user script is like this:
> 1) echo <new value> > /sys/xxxx
> 2) Start I/O immediately.
> 	^^^ Fail because requests would be refused(even software queue was still freezed).

Which error do you get? AFAICT reads/writes should either block when the 
queue is full, or if the fd is in non-blocking mode it should return EAGAIN 
(which a properly coded application should be prepared to deal with 
gracefully if it's using non-blocking fds anyway).
 
> It's not good for the end user have to wait for a random time before restart I/O.
> 
> 
> There are two more concerns I have:
>  * blkif_resume() may fail, how the end-user can aware that if "echo <new value> > /sys/xxx" already returned success.

If you really think this is needed, can't you use a monitor or some kind of 
condition with a timeout instead of open-coding it? Although I'm still not 
convinced that blocking here is TRTTD.

>  * We get the device lock and blk_mq_freeze_queue() in dynamic_reconfig_device(),
>    and then have to release in blkif_recover() asynchronously which makes the code more difficult to readable.

I'm not sure I'm following here, do you mean that you will pick the lock in 
dynamic_reconfig_device and release it in blkif_recover? Why wouldn't you 
release the lock in dynamic_reconfig_device after doing whatever is needed?

> As a result, I still prefer current synchronize way so that we can know whether blkif_resume fails,
> not break the end-user and more straightforward code.
> -- 
> Regards,
> -Bob

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]                   ` <20160725092002.madb7j6ryn4jcoho@mac>
@ 2016-07-25 10:29                     ` Bob Liu
       [not found]                     ` <5795EA02.2090200@oracle.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-25 10:29 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/25/2016 05:20 PM, Roger Pau Monné wrote:
> On Sat, Jul 23, 2016 at 06:18:23AM +0800, Bob Liu wrote:
>>
>> On 07/22/2016 07:45 PM, Roger Pau Monné wrote:
>>> On Fri, Jul 22, 2016 at 05:43:32PM +0800, Bob Liu wrote:
>>>>
>>>> On 07/22/2016 05:34 PM, Roger Pau Monné wrote:
>>>>> On Fri, Jul 22, 2016 at 04:17:48PM +0800, Bob Liu wrote:
>>>>>>
>>>>>> On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
>>>>>>> On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
>>>>>>>>
>>>>>>>> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
>>>>>> ..[snip]..
>>>>>>>>>> +
>>>>>>>>>> +static ssize_t dynamic_reconfig_device(struct blkfront_info *info, ssize_t count)
>>>>>>>>>> +{
>>>>>>>>>> +	unsigned int i;
>>>>>>>>>> +	int err = -EBUSY;
>>>>>>>>>> +
>>>>>>>>>> +	/*
>>>>>>>>>> +	 * Make sure no migration in parallel, device lock is actually a
>>>>>>>>>> +	 * mutex.
>>>>>>>>>> +	 */
>>>>>>>>>> +	if (!device_trylock(&info->xbdev->dev)) {
>>>>>>>>>> +		pr_err("Fail to acquire dev:%s lock, may be in migration.\n",
>>>>>>>>>> +			dev_name(&info->xbdev->dev));
>>>>>>>>>> +		return err;
>>>>>>>>>> +	}
>>>>>>>>>> +
>>>>>>>>>> +	/*
>>>>>>>>>> +	 * Prevent new requests and guarantee no uncompleted reqs.
>>>>>>>>>> +	 */
>>>>>>>>>> +	blk_mq_freeze_queue(info->rq);
>>>>>>>>>> +	if (part_in_flight(&info->gd->part0))
>>>>>>>>>> +		goto out;
>>>>>>>>>> +
>>>>>>>>>> +	/*
>>>>>>>>>> +	 * Front 				Backend
>>>>>>>>>> +	 * Switch to XenbusStateClosed
>>>>>>>>>> +	 *					frontend_changed():
>>>>>>>>>> +	 *					 case XenbusStateClosed:
>>>>>>>>>> +	 *						xen_blkif_disconnect()
>>>>>>>>>> +	 *						Switch to XenbusStateClosed
>>>>>>>>>> +	 * blkfront_resume():
>>>>>>>>>> +	 *					frontend_changed():
>>>>>>>>>> +	 *						reconnect
>>>>>>>>>> +	 * Wait until XenbusStateConnected
>>>>>>>>>> +	 */
>>>>>>>>>> +	info->reconfiguring = true;
>>>>>>>>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
>>>>>>>>>> +
>>>>>>>>>> +	/* Poll every 100ms, 1 minute timeout. */
>>>>>>>>>> +	for (i = 0; i < 600; i++) {
>>>>>>>>>> +		/*
>>>>>>>>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
>>>>>>>>>> +		 * will clear reconfiguring.
>>>>>>>>>> +		 */
>>>>>>>>>> +		if (!info->reconfiguring)
>>>>>>>>>> +			goto resume;
>>>>>>>>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
>>>>>>>>>> +	}
>>>>>>>>>
>>>>>>>>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
>>>>>>>>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
>>>>>>>>> suspension does? blkback_changed would have to set the frontend state to 
>>>>>>>>> InitWait when it detects that the backend has switched to Closed, and call 
>>>>>>>>> blkfront_resume.
>>>>>>>>
>>>>>>>>
>>>>>>>> I think that won't work.
>>>>>>>> In the real "resume" case, the power management system will trigger all ->resume() path.
>>>>>>>> But there is no place for dynamic configuration.
>>>>>>>
>>>>>>> Hello,
>>>>>>>
>>>>>>> I think it should be possible to set info->reconfiguring and wait for the 
>>>>>>> backend to switch to state Closed, at that point we should call blkif_resume 
>>>>>>> (from blkback_changed) and the backend will follow the reconection.
>>>>>>>
>>>>>>
>>>>>> Okay, I get your point. Yes, that's an option.
>>>>>>
>>>>>> But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
>>>>>> E.g
>>>>>> A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
>>>>>> but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.
>>>>>
>>>>> That should not happen, AFAICT on migration the device never dissapears. 
>>>>
>>>> Oh, yes.
>>>>
>>>>> alloc_disk and friends should not be called on resume from migration (see 
>>>>> the switch in blkfront_connect, you should take the BLKIF_STATE_SUSPENDED 
>>>>> path for the reconfiguration).
>>>>>
>>>>
>>>> What about if the end-user starts I/O immediately after writing new value to /sys?
>>>> But the resume is still in progress.
>>>
>>> blkif_free already stops the queues by calling blk_mq_stop_hw_queues, and 
>>> blkif_queue_request will refuse to put anything on the ring if the state 
>>> is different than connected, which in turn makes blkif_queue_rq call 
>>> blk_mq_stop_hw_queue to stop the queue, so it should be safe.
>>>
>>
>> But this will surprise the end-user, our user script is like this:
>> 1) echo <new value> > /sys/xxxx
>> 2) Start I/O immediately.
>> 	^^^ Fail because requests would be refused(even software queue was still freezed).
> 
> Which error do you get? AFAICT reads/writes should either block when the 

You are right, the reads/writes will just block which is fine.

> queue is full, or if the fd is in non-blocking mode it should return EAGAIN 
> (which a properly coded application should be prepared to deal with 
> gracefully if it's using non-blocking fds anyway).
>  
>> It's not good for the end user have to wait for a random time before restart I/O.
>>
>>
>> There are two more concerns I have:
>>  * blkif_resume() may fail, how the end-user can aware that if "echo <new value> > /sys/xxx" already returned success.
> 
> If you really think this is needed, can't you use a monitor or some kind of 
> condition with a timeout instead of open-coding it? Although I'm still not 
> convinced that blocking here is TRTTD.
> 

Let me ask in another way.
If moving blkfront_resume() to blkback_changed, should we check the return value of blkfront_resume()?
And what to do if it returns error.


>>  * We get the device lock and blk_mq_freeze_queue() in dynamic_reconfig_device(),
>>    and then have to release in blkif_recover() asynchronously which makes the code more difficult to readable.
> 
> I'm not sure I'm following here, do you mean that you will pick the lock in 
> dynamic_reconfig_device and release it in blkif_recover? Why wouldn't you 

Yes.

> release the lock in dynamic_reconfig_device after doing whatever is needed?
> 

Both 'dynamic configuration' and migration:xenbus_dev_resume() use { blkfront_resume(); blkif_recover() } and depends on the change of xbdev->state.
If they happen simultaneously, the State machine of xbdev->state is going to be a mess and very difficult to track.

The lock(actually a mutex) is like a big lock to make sure no race would happen at all.

Thanks,
Bob Liu



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]                     ` <5795EA02.2090200@oracle.com>
@ 2016-07-25 10:53                       ` Roger Pau Monné
       [not found]                       ` <20160725105314.aatqpi4rxaxntt5b@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-25 10:53 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Mon, Jul 25, 2016 at 06:29:22PM +0800, Bob Liu wrote:
> 
> On 07/25/2016 05:20 PM, Roger Pau Monné wrote:
> > On Sat, Jul 23, 2016 at 06:18:23AM +0800, Bob Liu wrote:
> >>
> >> On 07/22/2016 07:45 PM, Roger Pau Monné wrote:
> >>> On Fri, Jul 22, 2016 at 05:43:32PM +0800, Bob Liu wrote:
> >>>>
> >>>> On 07/22/2016 05:34 PM, Roger Pau Monné wrote:
> >>>>> On Fri, Jul 22, 2016 at 04:17:48PM +0800, Bob Liu wrote:
> >>>>>>
> >>>>>> On 07/22/2016 03:45 PM, Roger Pau Monné wrote:
> >>>>>>> On Thu, Jul 21, 2016 at 06:08:05PM +0800, Bob Liu wrote:
> >>>>>>>>
> >>>>>>>> On 07/21/2016 04:57 PM, Roger Pau Monné wrote:
> >>>>>>>>>> +	blk_mq_freeze_queue(info->rq);
> >>>>>>>>>> +	if (part_in_flight(&info->gd->part0))
> >>>>>>>>>> +		goto out;
> >>>>>>>>>> +
> >>>>>>>>>> +	/*
> >>>>>>>>>> +	 * Front 				Backend
> >>>>>>>>>> +	 * Switch to XenbusStateClosed
> >>>>>>>>>> +	 *					frontend_changed():
> >>>>>>>>>> +	 *					 case XenbusStateClosed:
> >>>>>>>>>> +	 *						xen_blkif_disconnect()
> >>>>>>>>>> +	 *						Switch to XenbusStateClosed
> >>>>>>>>>> +	 * blkfront_resume():
> >>>>>>>>>> +	 *					frontend_changed():
> >>>>>>>>>> +	 *						reconnect
> >>>>>>>>>> +	 * Wait until XenbusStateConnected
> >>>>>>>>>> +	 */
> >>>>>>>>>> +	info->reconfiguring = true;
> >>>>>>>>>> +	xenbus_switch_state(info->xbdev, XenbusStateClosed);
> >>>>>>>>>> +
> >>>>>>>>>> +	/* Poll every 100ms, 1 minute timeout. */
> >>>>>>>>>> +	for (i = 0; i < 600; i++) {
> >>>>>>>>>> +		/*
> >>>>>>>>>> +		 * Wait backend enter XenbusStateClosed, blkback_changed()
> >>>>>>>>>> +		 * will clear reconfiguring.
> >>>>>>>>>> +		 */
> >>>>>>>>>> +		if (!info->reconfiguring)
> >>>>>>>>>> +			goto resume;
> >>>>>>>>>> +		schedule_timeout_interruptible(msecs_to_jiffies(100));
> >>>>>>>>>> +	}
> >>>>>>>>>
> >>>>>>>>> Instead of having this wait, could you just set info->reconfiguring = 1, set 
> >>>>>>>>> the frontend state to XenbusStateClosed and mimic exactly what a resume from 
> >>>>>>>>> suspension does? blkback_changed would have to set the frontend state to 
> >>>>>>>>> InitWait when it detects that the backend has switched to Closed, and call 
> >>>>>>>>> blkfront_resume.
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> I think that won't work.
> >>>>>>>> In the real "resume" case, the power management system will trigger all ->resume() path.
> >>>>>>>> But there is no place for dynamic configuration.
> >>>>>>>
> >>>>>>> Hello,
> >>>>>>>
> >>>>>>> I think it should be possible to set info->reconfiguring and wait for the 
> >>>>>>> backend to switch to state Closed, at that point we should call blkif_resume 
> >>>>>>> (from blkback_changed) and the backend will follow the reconection.
> >>>>>>>
> >>>>>>
> >>>>>> Okay, I get your point. Yes, that's an option.
> >>>>>>
> >>>>>> But this will make 'dynamic configuration' to be async, I'm worry about the end-user will get panic.
> >>>>>> E.g
> >>>>>> A end-user "echo <new value> > /sys/devices/vbd-xxx/max_indirect_segs",
> >>>>>> but then the device will be Closed and disappeared, the user have to wait for a random time so that the device can resume.
> >>>>>
> >>>>> That should not happen, AFAICT on migration the device never dissapears. 
> >>>>
> >>>> Oh, yes.
> >>>>
> >>>>> alloc_disk and friends should not be called on resume from migration (see 
> >>>>> the switch in blkfront_connect, you should take the BLKIF_STATE_SUSPENDED 
> >>>>> path for the reconfiguration).
> >>>>>
> >>>>
> >>>> What about if the end-user starts I/O immediately after writing new value to /sys?
> >>>> But the resume is still in progress.
> >>>
> >>> blkif_free already stops the queues by calling blk_mq_stop_hw_queues, and 
> >>> blkif_queue_request will refuse to put anything on the ring if the state 
> >>> is different than connected, which in turn makes blkif_queue_rq call 
> >>> blk_mq_stop_hw_queue to stop the queue, so it should be safe.
> >>>
> >>
> >> But this will surprise the end-user, our user script is like this:
> >> 1) echo <new value> > /sys/xxxx
> >> 2) Start I/O immediately.
> >> 	^^^ Fail because requests would be refused(even software queue was still freezed).
> > 
> > Which error do you get? AFAICT reads/writes should either block when the 
> 
> You are right, the reads/writes will just block which is fine.
> 
> > queue is full, or if the fd is in non-blocking mode it should return EAGAIN 
> > (which a properly coded application should be prepared to deal with 
> > gracefully if it's using non-blocking fds anyway).
> >  
> >> It's not good for the end user have to wait for a random time before restart I/O.
> >>
> >>
> >> There are two more concerns I have:
> >>  * blkif_resume() may fail, how the end-user can aware that if "echo <new value> > /sys/xxx" already returned success.
> > 
> > If you really think this is needed, can't you use a monitor or some kind of 
> > condition with a timeout instead of open-coding it? Although I'm still not 
> > convinced that blocking here is TRTTD.
> > 
> 
> Let me ask in another way.
> If moving blkfront_resume() to blkback_changed, should we check the return value of blkfront_resume()?
> And what to do if it returns error.

If it returns an error you should close the device. If you want a more 
advanced solution, you could fall back to the previously working parameters 
and try to reconnect again, but TBH, I doubt that this is going to fix 
whatever issue caused the first error.

> >>  * We get the device lock and blk_mq_freeze_queue() in dynamic_reconfig_device(),
> >>    and then have to release in blkif_recover() asynchronously which makes the code more difficult to readable.
> > 
> > I'm not sure I'm following here, do you mean that you will pick the lock in 
> > dynamic_reconfig_device and release it in blkif_recover? Why wouldn't you 
> 
> Yes.
> 
> > release the lock in dynamic_reconfig_device after doing whatever is needed?
> > 
> 
> Both 'dynamic configuration' and migration:xenbus_dev_resume() use { blkfront_resume(); blkif_recover() } and depends on the change of xbdev->state.
> If they happen simultaneously, the State machine of xbdev->state is going to be a mess and very difficult to track.
> 
> The lock(actually a mutex) is like a big lock to make sure no race would happen at all.

So the only thing that you should do is set the frontend state to closed and 
wait for the backend to also switch to state closed, and then switch the
frontend state to init again in order to trigger a reconnection.

You are right that all this process depends on the state being updated 
correctly, but I don't see how's that different from a normal connection or 
resume, and we don't seem to have races there.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]                       ` <20160725105314.aatqpi4rxaxntt5b@mac>
@ 2016-07-25 11:08                         ` Bob Liu
       [not found]                         ` <5795F334.4040106@oracle.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-25 11:08 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/25/2016 06:53 PM, Roger Pau Monné wrote:
..[snip..]
>>>>  * We get the device lock and blk_mq_freeze_queue() in dynamic_reconfig_device(),
>>>>    and then have to release in blkif_recover() asynchronously which makes the code more difficult to readable.
>>>
>>> I'm not sure I'm following here, do you mean that you will pick the lock in 
>>> dynamic_reconfig_device and release it in blkif_recover? Why wouldn't you 
>>
>> Yes.
>>
>>> release the lock in dynamic_reconfig_device after doing whatever is needed?
>>>
>>
>> Both 'dynamic configuration' and migration:xenbus_dev_resume() use { blkfront_resume(); blkif_recover() } and depends on the change of xbdev->state.
>> If they happen simultaneously, the State machine of xbdev->state is going to be a mess and very difficult to track.
>>
>> The lock(actually a mutex) is like a big lock to make sure no race would happen at all.
> 
> So the only thing that you should do is set the frontend state to closed and 
> wait for the backend to also switch to state closed, and then switch the
> frontend state to init again in order to trigger a reconnection.
> 

But if migration:xenbus_dev_resume() is triggered at the same time, any state be set might be changed.
=====
E.g
Dynamic_reconfig_device:                                Migration:xenbus_dev_resume()

Set the frontend state to closed       
                 
							^^^^
							frontend state will be reset to XenbusStateInitialising by xenbus_dev_resume()

Wait for the backend to also switch to state closed
=====
Similar situation may happen at any other place regarding set the state.

> You are right that all this process depends on the state being updated 
> correctly, but I don't see how's that different from a normal connection or 
> resume, and we don't seem to have races there.
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]                         ` <5795F334.4040106@oracle.com>
@ 2016-07-25 12:11                           ` Roger Pau Monné
       [not found]                           ` <20160725121121.6ym4wjjgqyp7akgx@mac>
  1 sibling, 0 replies; 23+ messages in thread
From: Roger Pau Monné @ 2016-07-25 12:11 UTC (permalink / raw)
  To: Bob Liu; +Cc: jgross, xen-devel, linux-kernel

On Mon, Jul 25, 2016 at 07:08:36PM +0800, Bob Liu wrote:
> 
> On 07/25/2016 06:53 PM, Roger Pau Monné wrote:
> ..[snip..]
> >>>>  * We get the device lock and blk_mq_freeze_queue() in dynamic_reconfig_device(),
> >>>>    and then have to release in blkif_recover() asynchronously which makes the code more difficult to readable.
> >>>
> >>> I'm not sure I'm following here, do you mean that you will pick the lock in 
> >>> dynamic_reconfig_device and release it in blkif_recover? Why wouldn't you 
> >>
> >> Yes.
> >>
> >>> release the lock in dynamic_reconfig_device after doing whatever is needed?
> >>>
> >>
> >> Both 'dynamic configuration' and migration:xenbus_dev_resume() use { blkfront_resume(); blkif_recover() } and depends on the change of xbdev->state.
> >> If they happen simultaneously, the State machine of xbdev->state is going to be a mess and very difficult to track.
> >>
> >> The lock(actually a mutex) is like a big lock to make sure no race would happen at all.
> > 
> > So the only thing that you should do is set the frontend state to closed and 
> > wait for the backend to also switch to state closed, and then switch the
> > frontend state to init again in order to trigger a reconnection.
> > 
> 
> But if migration:xenbus_dev_resume() is triggered at the same time, any state be set might be changed.
> =====
> E.g
> Dynamic_reconfig_device:                                Migration:xenbus_dev_resume()
> 
> Set the frontend state to closed       
>                  
> 							^^^^
> 							frontend state will be reset to XenbusStateInitialising by xenbus_dev_resume()
> 
> Wait for the backend to also switch to state closed

This is not really a race, the backend will not switch to state closed, and 
instead will appear at state init again, which is what we wanted anyway in 
order to reconnect, so I don't see an issue with this flow.

> =====
> Similar situation may happen at any other place regarding set the state.

Right, but I don't see how your proposed patch also avoids this issues. I 
see that you pick the device lock in dynamic_reconfig_device, but I don't 
see xenbus_dev_resume picking the lock at all, and in any case I don't think 
you should prevent the VM from migrating.

Depending on the toolstack it might decide to kill the VM because it has not 
been able to migrate it, in which case the result is not better.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources
       [not found]                           ` <20160725121121.6ym4wjjgqyp7akgx@mac>
@ 2016-07-25 12:25                             ` Bob Liu
  0 siblings, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-25 12:25 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: jgross, xen-devel, linux-kernel


On 07/25/2016 08:11 PM, Roger Pau Monné wrote:
> On Mon, Jul 25, 2016 at 07:08:36PM +0800, Bob Liu wrote:
>>
>> On 07/25/2016 06:53 PM, Roger Pau Monné wrote:
>> ..[snip..]
>>>>>>  * We get the device lock and blk_mq_freeze_queue() in dynamic_reconfig_device(),
>>>>>>    and then have to release in blkif_recover() asynchronously which makes the code more difficult to readable.
>>>>>
>>>>> I'm not sure I'm following here, do you mean that you will pick the lock in 
>>>>> dynamic_reconfig_device and release it in blkif_recover? Why wouldn't you 
>>>>
>>>> Yes.
>>>>
>>>>> release the lock in dynamic_reconfig_device after doing whatever is needed?
>>>>>
>>>>
>>>> Both 'dynamic configuration' and migration:xenbus_dev_resume() use { blkfront_resume(); blkif_recover() } and depends on the change of xbdev->state.
>>>> If they happen simultaneously, the State machine of xbdev->state is going to be a mess and very difficult to track.
>>>>
>>>> The lock(actually a mutex) is like a big lock to make sure no race would happen at all.
>>>
>>> So the only thing that you should do is set the frontend state to closed and 
>>> wait for the backend to also switch to state closed, and then switch the
>>> frontend state to init again in order to trigger a reconnection.
>>>
>>
>> But if migration:xenbus_dev_resume() is triggered at the same time, any state be set might be changed.
>> =====
>> E.g
>> Dynamic_reconfig_device:                                Migration:xenbus_dev_resume()
>>
>> Set the frontend state to closed       
>>                  
>> 							^^^^
>> 							frontend state will be reset to XenbusStateInitialising by xenbus_dev_resume()
>>
>> Wait for the backend to also switch to state closed
> 
> This is not really a race, the backend will not switch to state closed, and 
> instead will appear at state init again, which is what we wanted anyway in 
> order to reconnect, so I don't see an issue with this flow.
> 
>> =====
>> Similar situation may happen at any other place regarding set the state.
> 
> Right, but I don't see how your proposed patch also avoids this issues. I 
> see that you pick the device lock in dynamic_reconfig_device, but I don't 
> see xenbus_dev_resume picking the lock at all, and in any case I don't think 

The lock is picked from the power management framework.

Anyway, I'm convinced and will follow your suggestion.
Thank you!

> you should prevent the VM from migrating.
> 
> Depending on the toolstack it might decide to kill the VM because it has not 
> been able to migrate it, in which case the result is not better.
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
       [not found] ` <20160728011942.GB3165@char.us.oracle.com>
@ 2016-07-28  9:03   ` Bob Liu
  0 siblings, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-28  9:03 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: xen-devel, linux-kernel, roger.pau


On 07/28/2016 09:19 AM, Konrad Rzeszutek Wilk wrote:
> On Tue, Jul 26, 2016 at 01:19:35PM +0800, Bob Liu wrote:
>> Two places didn't get updated when 64KB page granularity was introduced, this
>> patch fix them.
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Could you rebase this on xen-tip/for-linus-4.8 pls?

Done, sent the v2 for you to pick up.

> 
>> ---
>>  drivers/block/xen-blkfront.c |    4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index fcc5b4e..032fc94 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -1321,7 +1321,7 @@ free_shadow:
>>  			rinfo->ring_ref[i] = GRANT_INVALID_REF;
>>  		}
>>  	}
>> -	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
>> +	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
>>  	rinfo->ring.sring = NULL;
>>  
>>  	if (rinfo->irq)
>> @@ -2013,7 +2013,7 @@ static int blkif_recover(struct blkfront_info *info)
>>  
>>  	blkfront_gather_backend_features(info);
>>  	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
>> -	blk_queue_max_segments(info->rq, segs);
>> +	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
>>  
>>  	for (r_index = 0; r_index < info->nr_rings; r_index++) {
>>  		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
>> -- 
>> 1.7.10.4
>>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
       [not found] <1469510377-15131-1-git-send-email-bob.liu@oracle.com>
@ 2016-07-28  1:19 ` Konrad Rzeszutek Wilk
       [not found] ` <20160728011942.GB3165@char.us.oracle.com>
  1 sibling, 0 replies; 23+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-07-28  1:19 UTC (permalink / raw)
  To: Bob Liu; +Cc: xen-devel, linux-kernel, roger.pau

On Tue, Jul 26, 2016 at 01:19:35PM +0800, Bob Liu wrote:
> Two places didn't get updated when 64KB page granularity was introduced, this
> patch fix them.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Could you rebase this on xen-tip/for-linus-4.8 pls?

> ---
>  drivers/block/xen-blkfront.c |    4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index fcc5b4e..032fc94 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -1321,7 +1321,7 @@ free_shadow:
>  			rinfo->ring_ref[i] = GRANT_INVALID_REF;
>  		}
>  	}
> -	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
> +	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
>  	rinfo->ring.sring = NULL;
>  
>  	if (rinfo->irq)
> @@ -2013,7 +2013,7 @@ static int blkif_recover(struct blkfront_info *info)
>  
>  	blkfront_gather_backend_features(info);
>  	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
> -	blk_queue_max_segments(info->rq, segs);
> +	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
>  
>  	for (r_index = 0; r_index < info->nr_rings; r_index++) {
>  		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity
@ 2016-07-26  5:19 Bob Liu
  0 siblings, 0 replies; 23+ messages in thread
From: Bob Liu @ 2016-07-26  5:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: xen-devel, Bob Liu, roger.pau

Two places didn't get updated when 64KB page granularity was introduced, this
patch fix them.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
---
 drivers/block/xen-blkfront.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index fcc5b4e..032fc94 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1321,7 +1321,7 @@ free_shadow:
 			rinfo->ring_ref[i] = GRANT_INVALID_REF;
 		}
 	}
-	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
 	rinfo->ring.sring = NULL;
 
 	if (rinfo->irq)
@@ -2013,7 +2013,7 @@ static int blkif_recover(struct blkfront_info *info)
 
 	blkfront_gather_backend_features(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	blk_queue_max_segments(info->rq, segs);
+	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
 
 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
 		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2016-07-28  9:04 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-15  9:31 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
2016-07-15  9:31 ` [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits() Bob Liu
2016-07-15  9:31 ` [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources Bob Liu
2016-07-21  8:06 ` [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Roger Pau Monné
     [not found] ` <1468575109-12209-2-git-send-email-bob.liu@oracle.com>
2016-07-21  8:29   ` [PATCH 2/3] xen-blkfront: introduce blkif_set_queue_limits() Roger Pau Monné
     [not found]   ` <20160721082913.ahsy5a63ymfoymqv@mac>
2016-07-21  9:44     ` Bob Liu
     [not found] ` <1468575109-12209-3-git-send-email-bob.liu@oracle.com>
2016-07-21  8:57   ` [PATCH 3/3] xen-blkfront: dynamic configuration of per-vbd resources Roger Pau Monné
     [not found]   ` <20160721085756.ps4rtdns4xh35yii@mac>
2016-07-21 10:08     ` Bob Liu
     [not found]     ` <57909F05.9030809@oracle.com>
2016-07-22  7:45       ` Roger Pau Monné
     [not found]       ` <20160722074506.l5nfcmqg3jzsmxzi@mac>
2016-07-22  8:17         ` Bob Liu
     [not found]         ` <5791D6AC.1070604@oracle.com>
2016-07-22  9:34           ` Roger Pau Monné
     [not found]           ` <20160722093409.iwcmlubhou4rjjop@mac>
2016-07-22  9:43             ` Bob Liu
     [not found]             ` <5791EAC4.2030309@oracle.com>
2016-07-22 11:45               ` Roger Pau Monné
     [not found]               ` <20160722114538.7un36zw7mrvcueob@mac>
2016-07-22 22:18                 ` Bob Liu
     [not found]                 ` <57929BAF.4030304@oracle.com>
2016-07-25  9:20                   ` Roger Pau Monné
     [not found]                   ` <20160725092002.madb7j6ryn4jcoho@mac>
2016-07-25 10:29                     ` Bob Liu
     [not found]                     ` <5795EA02.2090200@oracle.com>
2016-07-25 10:53                       ` Roger Pau Monné
     [not found]                       ` <20160725105314.aatqpi4rxaxntt5b@mac>
2016-07-25 11:08                         ` Bob Liu
     [not found]                         ` <5795F334.4040106@oracle.com>
2016-07-25 12:11                           ` Roger Pau Monné
     [not found]                           ` <20160725121121.6ym4wjjgqyp7akgx@mac>
2016-07-25 12:25                             ` Bob Liu
2016-07-26  5:19 [PATCH 1/3] xen-blkfront: fix places not updated after introducing 64KB page granularity Bob Liu
     [not found] <1469510377-15131-1-git-send-email-bob.liu@oracle.com>
2016-07-28  1:19 ` Konrad Rzeszutek Wilk
     [not found] ` <20160728011942.GB3165@char.us.oracle.com>
2016-07-28  9:03   ` Bob Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).