All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] block multiqueue support for virtio-blk
@ 2013-10-25 13:04 Christoph Hellwig
  2013-10-25 13:05 ` [PATCH 1/2] blk-mq: add blk_mq_stop_hw_queues Christoph Hellwig
  2013-10-25 13:05 ` [PATCH 2/2] virtio_blk: blk-mq support Christoph Hellwig
  0 siblings, 2 replies; 12+ messages in thread
From: Christoph Hellwig @ 2013-10-25 13:04 UTC (permalink / raw)
  To: Jens Axboe, Rusty Russell, Asias He, linux-kernel

This is an updated version of the block multiqueue support for virtio-blk,
against Jens' blk-mq/core tree.  It's been working fine on my local tests,
but I'd love to see current performance numbers on hardware fast enough that
people were using the bio path for it earlier.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] blk-mq: add blk_mq_stop_hw_queues
  2013-10-25 13:04 [PATCH 0/2] block multiqueue support for virtio-blk Christoph Hellwig
@ 2013-10-25 13:05 ` Christoph Hellwig
  2013-10-25 13:44   ` Jens Axboe
  2013-10-25 13:05 ` [PATCH 2/2] virtio_blk: blk-mq support Christoph Hellwig
  1 sibling, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2013-10-25 13:05 UTC (permalink / raw)
  To: Jens Axboe, Rusty Russell, Asias He, linux-kernel

[-- Attachment #1: 0001-blk-mq-add-blk_mq_stop_hw_queues.patch --]
[-- Type: text/plain, Size: 1368 bytes --]

Add a helper to iterate over all hw queues and stop them.  This is useful
for driver that implement PM suspend functionality.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c         |   12 ++++++++++++
 include/linux/blk-mq.h |    1 +
 2 files changed, 13 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f21ec96..04050a3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -679,6 +679,18 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_start_hw_queue);
 
+void blk_mq_stop_hw_queues(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		cancel_delayed_work(&hctx->delayed_work);
+		set_bit(BLK_MQ_S_STOPPED, &hctx->state);
+	}
+}
+EXPORT_SYMBOL(blk_mq_stop_hw_queues);
+
 void blk_mq_start_stopped_hw_queues(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 746042ff..3368b97 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -136,6 +136,7 @@ void blk_mq_end_io(struct request *rq, int error);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
+void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q);
 
 /*
-- 
1.7.10.4



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-25 13:04 [PATCH 0/2] block multiqueue support for virtio-blk Christoph Hellwig
  2013-10-25 13:05 ` [PATCH 1/2] blk-mq: add blk_mq_stop_hw_queues Christoph Hellwig
@ 2013-10-25 13:05 ` Christoph Hellwig
  2013-10-25 13:51   ` Jens Axboe
                     ` (2 more replies)
  1 sibling, 3 replies; 12+ messages in thread
From: Christoph Hellwig @ 2013-10-25 13:05 UTC (permalink / raw)
  To: Jens Axboe, Rusty Russell, Asias He, linux-kernel

[-- Attachment #1: 0002-virtio_blk-blk-mq-support.patch --]
[-- Type: text/plain, Size: 13286 bytes --]

Switch virtio-blk from the dual support for old-style requests and bios
to use the block-multiqueue.  For now pretend to have 4 issue queues
as Jens pulled that out of his this hair and it worked.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/virtio_blk.c |  312 +++++++++-----------------------------------
 1 file changed, 65 insertions(+), 247 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5cdf88b..87f099d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -11,12 +11,11 @@
 #include <linux/string_helpers.h>
 #include <scsi/scsi_cmnd.h>
 #include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include <linux/numa.h>
 
 #define PART_BITS 4
 
-static bool use_bio;
-module_param(use_bio, bool, S_IRUGO);
-
 static int major;
 static DEFINE_IDA(vd_index_ida);
 
@@ -26,13 +25,11 @@ struct virtio_blk
 {
 	struct virtio_device *vdev;
 	struct virtqueue *vq;
-	wait_queue_head_t queue_wait;
+	spinlock_t vq_lock;
 
 	/* The disk structure for the kernel. */
 	struct gendisk *disk;
 
-	mempool_t *pool;
-
 	/* Process context for config space updates */
 	struct work_struct config_work;
 
@@ -47,15 +44,11 @@ struct virtio_blk
 
 	/* Ida index - used to track minor number allocations. */
 	int index;
-
-	/* Scatterlist: can be too big for stack. */
-	struct scatterlist sg[/*sg_elems*/];
 };
 
 struct virtblk_req
 {
 	struct request *req;
-	struct bio *bio;
 	struct virtio_blk_outhdr out_hdr;
 	struct virtio_scsi_inhdr in_hdr;
 	struct work_struct work;
@@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
 	}
 }
 
-static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
-						    gfp_t gfp_mask)
-{
-	struct virtblk_req *vbr;
-
-	vbr = mempool_alloc(vblk->pool, gfp_mask);
-	if (!vbr)
-		return NULL;
-
-	vbr->vblk = vblk;
-	if (use_bio)
-		sg_init_table(vbr->sg, vblk->sg_elems);
-
-	return vbr;
-}
-
 static int __virtblk_add_req(struct virtqueue *vq,
 			     struct virtblk_req *vbr,
 			     struct scatterlist *data_sg,
@@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
-static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-	DEFINE_WAIT(wait);
-	int ret;
-
-	spin_lock_irq(vblk->disk->queue->queue_lock);
-	while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
-						 have_data)) < 0)) {
-		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
-					  TASK_UNINTERRUPTIBLE);
-
-		spin_unlock_irq(vblk->disk->queue->queue_lock);
-		io_schedule();
-		spin_lock_irq(vblk->disk->queue->queue_lock);
-
-		finish_wait(&vblk->queue_wait, &wait);
-	}
-
-	virtqueue_kick(vblk->vq);
-	spin_unlock_irq(vblk->disk->queue->queue_lock);
-}
-
-static void virtblk_bio_send_flush(struct virtblk_req *vbr)
-{
-	vbr->flags |= VBLK_IS_FLUSH;
-	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
-	vbr->out_hdr.sector = 0;
-	vbr->out_hdr.ioprio = 0;
-
-	virtblk_add_req(vbr, false);
-}
-
-static void virtblk_bio_send_data(struct virtblk_req *vbr)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-	struct bio *bio = vbr->bio;
-	bool have_data;
-
-	vbr->flags &= ~VBLK_IS_FLUSH;
-	vbr->out_hdr.type = 0;
-	vbr->out_hdr.sector = bio->bi_sector;
-	vbr->out_hdr.ioprio = bio_prio(bio);
-
-	if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
-		have_data = true;
-		if (bio->bi_rw & REQ_WRITE)
-			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
-		else
-			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
-	} else
-		have_data = false;
-
-	virtblk_add_req(vbr, have_data);
-}
-
-static void virtblk_bio_send_data_work(struct work_struct *work)
-{
-	struct virtblk_req *vbr;
-
-	vbr = container_of(work, struct virtblk_req, work);
-
-	virtblk_bio_send_data(vbr);
-}
-
-static void virtblk_bio_send_flush_work(struct work_struct *work)
-{
-	struct virtblk_req *vbr;
-
-	vbr = container_of(work, struct virtblk_req, work);
-
-	virtblk_bio_send_flush(vbr);
-}
-
 static inline void virtblk_request_done(struct virtblk_req *vbr)
 {
-	struct virtio_blk *vblk = vbr->vblk;
 	struct request *req = vbr->req;
 	int error = virtblk_result(vbr);
 
@@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
 		req->errors = (error != 0);
 	}
 
-	__blk_end_request_all(req, error);
-	mempool_free(vbr, vblk->pool);
-}
-
-static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-
-	if (vbr->flags & VBLK_REQ_DATA) {
-		/* Send out the actual write data */
-		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
-		queue_work(virtblk_wq, &vbr->work);
-	} else {
-		bio_endio(vbr->bio, virtblk_result(vbr));
-		mempool_free(vbr, vblk->pool);
-	}
-}
-
-static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-
-	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
-		/* Send out a flush before end the bio */
-		vbr->flags &= ~VBLK_REQ_DATA;
-		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
-		queue_work(virtblk_wq, &vbr->work);
-	} else {
-		bio_endio(vbr->bio, virtblk_result(vbr));
-		mempool_free(vbr, vblk->pool);
-	}
-}
-
-static inline void virtblk_bio_done(struct virtblk_req *vbr)
-{
-	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
-		virtblk_bio_flush_done(vbr);
-	else
-		virtblk_bio_data_done(vbr);
+	blk_mq_end_io(req, error);
 }
 
 static void virtblk_done(struct virtqueue *vq)
 {
 	struct virtio_blk *vblk = vq->vdev->priv;
-	bool bio_done = false, req_done = false;
+	bool req_done = false;
 	struct virtblk_req *vbr;
 	unsigned long flags;
 	unsigned int len;
 
-	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
+	spin_lock_irqsave(&vblk->vq_lock, flags);
 	do {
 		virtqueue_disable_cb(vq);
 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
-			if (vbr->bio) {
-				virtblk_bio_done(vbr);
-				bio_done = true;
-			} else {
-				virtblk_request_done(vbr);
-				req_done = true;
-			}
+			virtblk_request_done(vbr);
+			req_done = true;
 		}
 	} while (!virtqueue_enable_cb(vq));
+	spin_unlock_irqrestore(&vblk->vq_lock, flags);
+
 	/* In case queue is stopped waiting for more buffers. */
 	if (req_done)
-		blk_start_queue(vblk->disk->queue);
-	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
-
-	if (bio_done)
-		wake_up(&vblk->queue_wait);
+		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
 }
 
-static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
-		   struct request *req)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 {
+	struct virtio_blk *vblk = hctx->queue->queuedata;
+	struct virtblk_req *vbr = req->special;
+	unsigned long flags;
 	unsigned int num;
-	struct virtblk_req *vbr;
+	const bool last = (req->cmd_flags & REQ_END) != 0;
 
-	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
-	if (!vbr)
-		/* When another request finishes we'll try again. */
-		return false;
+	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
 	vbr->req = req;
-	vbr->bio = NULL;
 	if (req->cmd_flags & REQ_FLUSH) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
 		vbr->out_hdr.sector = 0;
@@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	num = blk_rq_map_sg(q, vbr->req, vblk->sg);
+	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(vbr->req) == WRITE)
 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
 	}
 
-	if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
-		mempool_free(vbr, vblk->pool);
-		return false;
-	}
-
-	return true;
-}
-
-static void virtblk_request(struct request_queue *q)
-{
-	struct virtio_blk *vblk = q->queuedata;
-	struct request *req;
-	unsigned int issued = 0;
-
-	while ((req = blk_peek_request(q)) != NULL) {
-		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
-
-		/* If this request fails, stop queue and wait for something to
-		   finish to restart it. */
-		if (!do_req(q, vblk, req)) {
-			blk_stop_queue(q);
-			break;
-		}
-		blk_start_request(req);
-		issued++;
-	}
-
-	if (issued)
+	spin_lock_irqsave(&vblk->vq_lock, flags);
+	if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+		spin_unlock_irqrestore(&vblk->vq_lock, flags);
+		blk_mq_stop_hw_queue(hctx);
 		virtqueue_kick(vblk->vq);
-}
-
-static void virtblk_make_request(struct request_queue *q, struct bio *bio)
-{
-	struct virtio_blk *vblk = q->queuedata;
-	struct virtblk_req *vbr;
-
-	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
-
-	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
-	if (!vbr) {
-		bio_endio(bio, -ENOMEM);
-		return;
+		return BLK_MQ_RQ_QUEUE_BUSY;
 	}
+	spin_unlock_irqrestore(&vblk->vq_lock, flags);
 
-	vbr->bio = bio;
-	vbr->flags = 0;
-	if (bio->bi_rw & REQ_FLUSH)
-		vbr->flags |= VBLK_REQ_FLUSH;
-	if (bio->bi_rw & REQ_FUA)
-		vbr->flags |= VBLK_REQ_FUA;
-	if (bio->bi_size)
-		vbr->flags |= VBLK_REQ_DATA;
-
-	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
-		virtblk_bio_send_flush(vbr);
-	else
-		virtblk_bio_send_data(vbr);
+	if (last)
+		virtqueue_kick(vblk->vq);
+	return BLK_MQ_RQ_QUEUE_OK;
 }
 
 /* return id (s/n) string for *disk to *id_str
@@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
 	       virtblk_cache_type_show, virtblk_cache_type_store);
 
+static struct blk_mq_ops virtio_mq_ops = {
+	.queue_rq	= virtio_queue_rq,
+	.map_queue	= blk_mq_map_queue,
+	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
+	.free_hctx	= blk_mq_free_single_hw_queue,
+};
+
+static struct blk_mq_reg virtio_mq_reg = {
+	.ops		= &virtio_mq_ops,
+	.nr_hw_queues	= 4,
+	.queue_depth	= 64,
+	.numa_node	= NUMA_NO_NODE,
+	.flags		= BLK_MQ_F_SHOULD_MERGE,
+};
+
+static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
+			     struct request *rq, unsigned int nr)
+{
+	struct virtio_blk *vblk = data;
+	struct virtblk_req *vbr = rq->special;
+
+	sg_init_table(vbr->sg, vblk->sg_elems);
+}
+
 static int virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
 	struct request_queue *q;
 	int err, index;
-	int pool_size;
 
 	u64 cap;
 	u32 v, blk_size, sg_elems, opt_io_size;
@@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 	/* We need an extra sg elements at head and tail. */
 	sg_elems += 2;
-	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
-				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
+	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
 	if (!vblk) {
 		err = -ENOMEM;
 		goto out_free_index;
 	}
 
-	init_waitqueue_head(&vblk->queue_wait);
 	vblk->vdev = vdev;
 	vblk->sg_elems = sg_elems;
-	sg_init_table(vblk->sg, vblk->sg_elems);
 	mutex_init(&vblk->config_lock);
 
 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev)
 	err = init_vq(vblk);
 	if (err)
 		goto out_free_vblk;
-
-	pool_size = sizeof(struct virtblk_req);
-	if (use_bio)
-		pool_size += sizeof(struct scatterlist) * sg_elems;
-	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
-	if (!vblk->pool) {
-		err = -ENOMEM;
-		goto out_free_vq;
-	}
+	spin_lock_init(&vblk->vq_lock);
 
 	/* FIXME: How many partitions?  How long is a piece of string? */
 	vblk->disk = alloc_disk(1 << PART_BITS);
 	if (!vblk->disk) {
 		err = -ENOMEM;
-		goto out_mempool;
+		goto out_free_vq;
 	}
 
-	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
+	virtio_mq_reg.cmd_size =
+		sizeof(struct virtblk_req) +
+		sizeof(struct scatterlist) * sg_elems;
+
+	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
 	if (!q) {
 		err = -ENOMEM;
 		goto out_put_disk;
 	}
 
-	if (use_bio)
-		blk_queue_make_request(q, virtblk_make_request);
+	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
+
 	q->queuedata = vblk;
 
 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -857,8 +683,6 @@ out_del_disk:
 	blk_cleanup_queue(vblk->disk->queue);
 out_put_disk:
 	put_disk(vblk->disk);
-out_mempool:
-	mempool_destroy(vblk->pool);
 out_free_vq:
 	vdev->config->del_vqs(vdev);
 out_free_vblk:
@@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev)
 
 	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
 	put_disk(vblk->disk);
-	mempool_destroy(vblk->pool);
 	vdev->config->del_vqs(vdev);
 	kfree(vblk);
 
@@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
 
 	flush_work(&vblk->config_work);
 
-	spin_lock_irq(vblk->disk->queue->queue_lock);
-	blk_stop_queue(vblk->disk->queue);
-	spin_unlock_irq(vblk->disk->queue->queue_lock);
-	blk_sync_queue(vblk->disk->queue);
+	blk_mq_stop_hw_queues(vblk->disk->queue);
 
 	vdev->config->del_vqs(vdev);
 	return 0;
@@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev)
 
 	vblk->config_enable = true;
 	ret = init_vq(vdev->priv);
-	if (!ret) {
-		spin_lock_irq(vblk->disk->queue->queue_lock);
-		blk_start_queue(vblk->disk->queue);
-		spin_unlock_irq(vblk->disk->queue->queue_lock);
-	}
+	if (!ret)
+		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
+
 	return ret;
 }
 #endif
-- 
1.7.10.4



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] blk-mq: add blk_mq_stop_hw_queues
  2013-10-25 13:05 ` [PATCH 1/2] blk-mq: add blk_mq_stop_hw_queues Christoph Hellwig
@ 2013-10-25 13:44   ` Jens Axboe
  0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2013-10-25 13:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Rusty Russell, Asias He, linux-kernel

On Fri, Oct 25 2013, Christoph Hellwig wrote:
> Add a helper to iterate over all hw queues and stop them.  This is useful
> for driver that implement PM suspend functionality.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/blk-mq.c         |   12 ++++++++++++
>  include/linux/blk-mq.h |    1 +
>  2 files changed, 13 insertions(+)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index f21ec96..04050a3 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -679,6 +679,18 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
>  }
>  EXPORT_SYMBOL(blk_mq_start_hw_queue);
>  
> +void blk_mq_stop_hw_queues(struct request_queue *q)
> +{
> +	struct blk_mq_hw_ctx *hctx;
> +	int i;
> +
> +	queue_for_each_hw_ctx(q, hctx, i) {
> +		cancel_delayed_work(&hctx->delayed_work);
> +		set_bit(BLK_MQ_S_STOPPED, &hctx->state);
> +	}

I changed this to call blk_mq_stop_hw_queue() instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-25 13:05 ` [PATCH 2/2] virtio_blk: blk-mq support Christoph Hellwig
@ 2013-10-25 13:51   ` Jens Axboe
  2013-10-25 16:06   ` Asias He
  2013-10-28  2:47   ` Rusty Russell
  2 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2013-10-25 13:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Rusty Russell, Asias He, linux-kernel

On Fri, Oct 25 2013, Christoph Hellwig wrote:
> Switch virtio-blk from the dual support for old-style requests and bios
> to use the block-multiqueue.  For now pretend to have 4 issue queues
> as Jens pulled that out of his this hair and it worked.

Updated the blk-mq/drivers to carry this version.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-25 13:05 ` [PATCH 2/2] virtio_blk: blk-mq support Christoph Hellwig
  2013-10-25 13:51   ` Jens Axboe
@ 2013-10-25 16:06   ` Asias He
  2013-10-28  2:47   ` Rusty Russell
  2 siblings, 0 replies; 12+ messages in thread
From: Asias He @ 2013-10-25 16:06 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Rusty Russell, linux-kernel

On Fri, Oct 25, 2013 at 06:05:01AM -0700, Christoph Hellwig wrote:
> Switch virtio-blk from the dual support for old-style requests and bios
> to use the block-multiqueue.  For now pretend to have 4 issue queues
> as Jens pulled that out of his this hair and it worked.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/block/virtio_blk.c |  312 +++++++++-----------------------------------
>  1 file changed, 65 insertions(+), 247 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 5cdf88b..87f099d 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -11,12 +11,11 @@
>  #include <linux/string_helpers.h>
>  #include <scsi/scsi_cmnd.h>
>  #include <linux/idr.h>
> +#include <linux/blk-mq.h>
> +#include <linux/numa.h>
>  
>  #define PART_BITS 4
>  
> -static bool use_bio;
> -module_param(use_bio, bool, S_IRUGO);
> -
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
>  
> @@ -26,13 +25,11 @@ struct virtio_blk
>  {
>  	struct virtio_device *vdev;
>  	struct virtqueue *vq;
> -	wait_queue_head_t queue_wait;
> +	spinlock_t vq_lock;
>  
>  	/* The disk structure for the kernel. */
>  	struct gendisk *disk;
>  
> -	mempool_t *pool;
> -
>  	/* Process context for config space updates */
>  	struct work_struct config_work;
>  
> @@ -47,15 +44,11 @@ struct virtio_blk
>  
>  	/* Ida index - used to track minor number allocations. */
>  	int index;
> -
> -	/* Scatterlist: can be too big for stack. */
> -	struct scatterlist sg[/*sg_elems*/];
>  };
>  
>  struct virtblk_req
>  {
>  	struct request *req;
> -	struct bio *bio;
>  	struct virtio_blk_outhdr out_hdr;
>  	struct virtio_scsi_inhdr in_hdr;
>  	struct work_struct work;

We can drop more things:

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 87f099d..c5ca368 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -51,20 +51,10 @@ struct virtblk_req
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
 	struct virtio_scsi_inhdr in_hdr;
-	struct work_struct work;
-	struct virtio_blk *vblk;
-	int flags;
 	u8 status;
 	struct scatterlist sg[];
 };
 
-enum {
-	VBLK_IS_FLUSH		= 1,
-	VBLK_REQ_FLUSH		= 2,
-	VBLK_REQ_DATA		= 4,
-	VBLK_REQ_FUA		= 8,
-};
-
 static inline int virtblk_result(struct virtblk_req *vbr)
 {
 	switch (vbr->status) {

> @@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
>  	}
>  }
>  
> -static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
> -						    gfp_t gfp_mask)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = mempool_alloc(vblk->pool, gfp_mask);
> -	if (!vbr)
> -		return NULL;
> -
> -	vbr->vblk = vblk;
> -	if (use_bio)
> -		sg_init_table(vbr->sg, vblk->sg_elems);
> -
> -	return vbr;
> -}
> -
>  static int __virtblk_add_req(struct virtqueue *vq,
>  			     struct virtblk_req *vbr,
>  			     struct scatterlist *data_sg,
> @@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> -static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -	DEFINE_WAIT(wait);
> -	int ret;
> -
> -	spin_lock_irq(vblk->disk->queue->queue_lock);
> -	while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
> -						 have_data)) < 0)) {
> -		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
> -					  TASK_UNINTERRUPTIBLE);
> -
> -		spin_unlock_irq(vblk->disk->queue->queue_lock);
> -		io_schedule();
> -		spin_lock_irq(vblk->disk->queue->queue_lock);
> -
> -		finish_wait(&vblk->queue_wait, &wait);
> -	}
> -
> -	virtqueue_kick(vblk->vq);
> -	spin_unlock_irq(vblk->disk->queue->queue_lock);
> -}
> -
> -static void virtblk_bio_send_flush(struct virtblk_req *vbr)
> -{
> -	vbr->flags |= VBLK_IS_FLUSH;
> -	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
> -	vbr->out_hdr.sector = 0;
> -	vbr->out_hdr.ioprio = 0;
> -
> -	virtblk_add_req(vbr, false);
> -}
> -
> -static void virtblk_bio_send_data(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -	struct bio *bio = vbr->bio;
> -	bool have_data;
> -
> -	vbr->flags &= ~VBLK_IS_FLUSH;
> -	vbr->out_hdr.type = 0;
> -	vbr->out_hdr.sector = bio->bi_sector;
> -	vbr->out_hdr.ioprio = bio_prio(bio);
> -
> -	if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
> -		have_data = true;
> -		if (bio->bi_rw & REQ_WRITE)
> -			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> -		else
> -			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
> -	} else
> -		have_data = false;
> -
> -	virtblk_add_req(vbr, have_data);
> -}
> -
> -static void virtblk_bio_send_data_work(struct work_struct *work)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = container_of(work, struct virtblk_req, work);
> -
> -	virtblk_bio_send_data(vbr);
> -}
> -
> -static void virtblk_bio_send_flush_work(struct work_struct *work)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = container_of(work, struct virtblk_req, work);
> -
> -	virtblk_bio_send_flush(vbr);
> -}
> -
>  static inline void virtblk_request_done(struct virtblk_req *vbr)
>  {
> -	struct virtio_blk *vblk = vbr->vblk;
>  	struct request *req = vbr->req;
>  	int error = virtblk_result(vbr);
>  
> @@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
>  		req->errors = (error != 0);
>  	}
>  
> -	__blk_end_request_all(req, error);
> -	mempool_free(vbr, vblk->pool);
> -}
> -
> -static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -
> -	if (vbr->flags & VBLK_REQ_DATA) {
> -		/* Send out the actual write data */
> -		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
> -		queue_work(virtblk_wq, &vbr->work);
> -	} else {
> -		bio_endio(vbr->bio, virtblk_result(vbr));
> -		mempool_free(vbr, vblk->pool);
> -	}
> -}
> -
> -static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -
> -	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
> -		/* Send out a flush before end the bio */
> -		vbr->flags &= ~VBLK_REQ_DATA;
> -		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
> -		queue_work(virtblk_wq, &vbr->work);
> -	} else {
> -		bio_endio(vbr->bio, virtblk_result(vbr));
> -		mempool_free(vbr, vblk->pool);
> -	}
> -}
> -
> -static inline void virtblk_bio_done(struct virtblk_req *vbr)
> -{
> -	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
> -		virtblk_bio_flush_done(vbr);
> -	else
> -		virtblk_bio_data_done(vbr);
> +	blk_mq_end_io(req, error);
>  }
>  
>  static void virtblk_done(struct virtqueue *vq)
>  {
>  	struct virtio_blk *vblk = vq->vdev->priv;
> -	bool bio_done = false, req_done = false;
> +	bool req_done = false;
>  	struct virtblk_req *vbr;
>  	unsigned long flags;
>  	unsigned int len;
>  
> -	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
> +	spin_lock_irqsave(&vblk->vq_lock, flags);
>  	do {
>  		virtqueue_disable_cb(vq);
>  		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
> -			if (vbr->bio) {
> -				virtblk_bio_done(vbr);
> -				bio_done = true;
> -			} else {
> -				virtblk_request_done(vbr);
> -				req_done = true;
> -			}
> +			virtblk_request_done(vbr);
> +			req_done = true;
>  		}
>  	} while (!virtqueue_enable_cb(vq));
> +	spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +
>  	/* In case queue is stopped waiting for more buffers. */
>  	if (req_done)
> -		blk_start_queue(vblk->disk->queue);
> -	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
> -
> -	if (bio_done)
> -		wake_up(&vblk->queue_wait);
> +		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
>  }
>  
> -static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> -		   struct request *req)
> +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
>  {
> +	struct virtio_blk *vblk = hctx->queue->queuedata;
> +	struct virtblk_req *vbr = req->special;
> +	unsigned long flags;
>  	unsigned int num;
> -	struct virtblk_req *vbr;
> +	const bool last = (req->cmd_flags & REQ_END) != 0;
>  
> -	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
> -	if (!vbr)
> -		/* When another request finishes we'll try again. */
> -		return false;
> +	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
>  
>  	vbr->req = req;
> -	vbr->bio = NULL;
>  	if (req->cmd_flags & REQ_FLUSH) {
>  		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
>  		vbr->out_hdr.sector = 0;
> @@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
>  		}
>  	}
>  
> -	num = blk_rq_map_sg(q, vbr->req, vblk->sg);
> +	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(vbr->req) == WRITE)
>  			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> @@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
>  			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
>  	}
>  
> -	if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
> -		mempool_free(vbr, vblk->pool);
> -		return false;
> -	}
> -
> -	return true;
> -}
> -
> -static void virtblk_request(struct request_queue *q)
> -{
> -	struct virtio_blk *vblk = q->queuedata;
> -	struct request *req;
> -	unsigned int issued = 0;
> -
> -	while ((req = blk_peek_request(q)) != NULL) {
> -		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> -
> -		/* If this request fails, stop queue and wait for something to
> -		   finish to restart it. */
> -		if (!do_req(q, vblk, req)) {
> -			blk_stop_queue(q);
> -			break;
> -		}
> -		blk_start_request(req);
> -		issued++;
> -	}
> -
> -	if (issued)
> +	spin_lock_irqsave(&vblk->vq_lock, flags);
> +	if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
> +		spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +		blk_mq_stop_hw_queue(hctx);
>  		virtqueue_kick(vblk->vq);
> -}
> -
> -static void virtblk_make_request(struct request_queue *q, struct bio *bio)
> -{
> -	struct virtio_blk *vblk = q->queuedata;
> -	struct virtblk_req *vbr;
> -
> -	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
> -
> -	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
> -	if (!vbr) {
> -		bio_endio(bio, -ENOMEM);
> -		return;
> +		return BLK_MQ_RQ_QUEUE_BUSY;
>  	}
> +	spin_unlock_irqrestore(&vblk->vq_lock, flags);
>  
> -	vbr->bio = bio;
> -	vbr->flags = 0;
> -	if (bio->bi_rw & REQ_FLUSH)
> -		vbr->flags |= VBLK_REQ_FLUSH;
> -	if (bio->bi_rw & REQ_FUA)
> -		vbr->flags |= VBLK_REQ_FUA;
> -	if (bio->bi_size)
> -		vbr->flags |= VBLK_REQ_DATA;
> -
> -	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
> -		virtblk_bio_send_flush(vbr);
> -	else
> -		virtblk_bio_send_data(vbr);
> +	if (last)
> +		virtqueue_kick(vblk->vq);
> +	return BLK_MQ_RQ_QUEUE_OK;
>  }
>  
>  /* return id (s/n) string for *disk to *id_str
> @@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
>  	__ATTR(cache_type, S_IRUGO|S_IWUSR,
>  	       virtblk_cache_type_show, virtblk_cache_type_store);
>  
> +static struct blk_mq_ops virtio_mq_ops = {
> +	.queue_rq	= virtio_queue_rq,
> +	.map_queue	= blk_mq_map_queue,
> +	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
> +	.free_hctx	= blk_mq_free_single_hw_queue,
> +};
> +
> +static struct blk_mq_reg virtio_mq_reg = {
> +	.ops		= &virtio_mq_ops,
> +	.nr_hw_queues	= 4,
> +	.queue_depth	= 64,
> +	.numa_node	= NUMA_NO_NODE,
> +	.flags		= BLK_MQ_F_SHOULD_MERGE,
> +};
> +
> +static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
> +			     struct request *rq, unsigned int nr)
> +{
> +	struct virtio_blk *vblk = data;
> +	struct virtblk_req *vbr = rq->special;
> +
> +	sg_init_table(vbr->sg, vblk->sg_elems);
> +}
> +
>  static int virtblk_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_blk *vblk;
>  	struct request_queue *q;
>  	int err, index;
> -	int pool_size;
>  
>  	u64 cap;
>  	u32 v, blk_size, sg_elems, opt_io_size;
> @@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev)
>  
>  	/* We need an extra sg elements at head and tail. */
>  	sg_elems += 2;
> -	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
> -				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
> +	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
>  	if (!vblk) {
>  		err = -ENOMEM;
>  		goto out_free_index;
>  	}
>  
> -	init_waitqueue_head(&vblk->queue_wait);
>  	vblk->vdev = vdev;
>  	vblk->sg_elems = sg_elems;
> -	sg_init_table(vblk->sg, vblk->sg_elems);
>  	mutex_init(&vblk->config_lock);
>  
>  	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
> @@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	err = init_vq(vblk);
>  	if (err)
>  		goto out_free_vblk;
> -
> -	pool_size = sizeof(struct virtblk_req);
> -	if (use_bio)
> -		pool_size += sizeof(struct scatterlist) * sg_elems;
> -	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
> -	if (!vblk->pool) {
> -		err = -ENOMEM;
> -		goto out_free_vq;
> -	}
> +	spin_lock_init(&vblk->vq_lock);
>  
>  	/* FIXME: How many partitions?  How long is a piece of string? */
>  	vblk->disk = alloc_disk(1 << PART_BITS);
>  	if (!vblk->disk) {
>  		err = -ENOMEM;
> -		goto out_mempool;
> +		goto out_free_vq;
>  	}
>  
> -	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
> +	virtio_mq_reg.cmd_size =
> +		sizeof(struct virtblk_req) +
> +		sizeof(struct scatterlist) * sg_elems;
> +
> +	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
>  	if (!q) {
>  		err = -ENOMEM;
>  		goto out_put_disk;
>  	}
>  
> -	if (use_bio)
> -		blk_queue_make_request(q, virtblk_make_request);
> +	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
> +
>  	q->queuedata = vblk;
>  
>  	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
> @@ -857,8 +683,6 @@ out_del_disk:
>  	blk_cleanup_queue(vblk->disk->queue);
>  out_put_disk:
>  	put_disk(vblk->disk);
> -out_mempool:
> -	mempool_destroy(vblk->pool);
>  out_free_vq:
>  	vdev->config->del_vqs(vdev);
>  out_free_vblk:
> @@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev)
>  
>  	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
>  	put_disk(vblk->disk);
> -	mempool_destroy(vblk->pool);
>  	vdev->config->del_vqs(vdev);
>  	kfree(vblk);
>  
> @@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
>  
>  	flush_work(&vblk->config_work);
>  
> -	spin_lock_irq(vblk->disk->queue->queue_lock);
> -	blk_stop_queue(vblk->disk->queue);
> -	spin_unlock_irq(vblk->disk->queue->queue_lock);
> -	blk_sync_queue(vblk->disk->queue);
> +	blk_mq_stop_hw_queues(vblk->disk->queue);
>  
>  	vdev->config->del_vqs(vdev);
>  	return 0;
> @@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev)
>  
>  	vblk->config_enable = true;
>  	ret = init_vq(vdev->priv);
> -	if (!ret) {
> -		spin_lock_irq(vblk->disk->queue->queue_lock);
> -		blk_start_queue(vblk->disk->queue);
> -		spin_unlock_irq(vblk->disk->queue->queue_lock);
> -	}
> +	if (!ret)
> +		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
> +
>  	return ret;
>  }
>  #endif
> -- 
> 1.7.10.4
> 
> 

-- 
Asias

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-25 13:05 ` [PATCH 2/2] virtio_blk: blk-mq support Christoph Hellwig
  2013-10-25 13:51   ` Jens Axboe
  2013-10-25 16:06   ` Asias He
@ 2013-10-28  2:47   ` Rusty Russell
  2013-10-28  8:52     ` Christoph Hellwig
  2 siblings, 1 reply; 12+ messages in thread
From: Rusty Russell @ 2013-10-28  2:47 UTC (permalink / raw)
  To: Christoph Hellwig, Jens Axboe, Asias He, linux-kernel

Christoph Hellwig <hch@infradead.org> writes:
> Switch virtio-blk from the dual support for old-style requests and bios
> to use the block-multiqueue.  For now pretend to have 4 issue queues
> as Jens pulled that out of his this hair and it worked.

Let's pretend I'm stupid.

We don't actually have multiple queues through to the host, but we're
pretending to, because it makes the block layer go faster?

Do I want to know *why* it's faster?  Or should I look the other way?

Thanks,
Rusty.

> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/block/virtio_blk.c |  312 +++++++++-----------------------------------
>  1 file changed, 65 insertions(+), 247 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 5cdf88b..87f099d 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -11,12 +11,11 @@
>  #include <linux/string_helpers.h>
>  #include <scsi/scsi_cmnd.h>
>  #include <linux/idr.h>
> +#include <linux/blk-mq.h>
> +#include <linux/numa.h>
>  
>  #define PART_BITS 4
>  
> -static bool use_bio;
> -module_param(use_bio, bool, S_IRUGO);
> -
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
>  
> @@ -26,13 +25,11 @@ struct virtio_blk
>  {
>  	struct virtio_device *vdev;
>  	struct virtqueue *vq;
> -	wait_queue_head_t queue_wait;
> +	spinlock_t vq_lock;
>  
>  	/* The disk structure for the kernel. */
>  	struct gendisk *disk;
>  
> -	mempool_t *pool;
> -
>  	/* Process context for config space updates */
>  	struct work_struct config_work;
>  
> @@ -47,15 +44,11 @@ struct virtio_blk
>  
>  	/* Ida index - used to track minor number allocations. */
>  	int index;
> -
> -	/* Scatterlist: can be too big for stack. */
> -	struct scatterlist sg[/*sg_elems*/];
>  };
>  
>  struct virtblk_req
>  {
>  	struct request *req;
> -	struct bio *bio;
>  	struct virtio_blk_outhdr out_hdr;
>  	struct virtio_scsi_inhdr in_hdr;
>  	struct work_struct work;
> @@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
>  	}
>  }
>  
> -static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
> -						    gfp_t gfp_mask)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = mempool_alloc(vblk->pool, gfp_mask);
> -	if (!vbr)
> -		return NULL;
> -
> -	vbr->vblk = vblk;
> -	if (use_bio)
> -		sg_init_table(vbr->sg, vblk->sg_elems);
> -
> -	return vbr;
> -}
> -
>  static int __virtblk_add_req(struct virtqueue *vq,
>  			     struct virtblk_req *vbr,
>  			     struct scatterlist *data_sg,
> @@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> -static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -	DEFINE_WAIT(wait);
> -	int ret;
> -
> -	spin_lock_irq(vblk->disk->queue->queue_lock);
> -	while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
> -						 have_data)) < 0)) {
> -		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
> -					  TASK_UNINTERRUPTIBLE);
> -
> -		spin_unlock_irq(vblk->disk->queue->queue_lock);
> -		io_schedule();
> -		spin_lock_irq(vblk->disk->queue->queue_lock);
> -
> -		finish_wait(&vblk->queue_wait, &wait);
> -	}
> -
> -	virtqueue_kick(vblk->vq);
> -	spin_unlock_irq(vblk->disk->queue->queue_lock);
> -}
> -
> -static void virtblk_bio_send_flush(struct virtblk_req *vbr)
> -{
> -	vbr->flags |= VBLK_IS_FLUSH;
> -	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
> -	vbr->out_hdr.sector = 0;
> -	vbr->out_hdr.ioprio = 0;
> -
> -	virtblk_add_req(vbr, false);
> -}
> -
> -static void virtblk_bio_send_data(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -	struct bio *bio = vbr->bio;
> -	bool have_data;
> -
> -	vbr->flags &= ~VBLK_IS_FLUSH;
> -	vbr->out_hdr.type = 0;
> -	vbr->out_hdr.sector = bio->bi_sector;
> -	vbr->out_hdr.ioprio = bio_prio(bio);
> -
> -	if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
> -		have_data = true;
> -		if (bio->bi_rw & REQ_WRITE)
> -			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> -		else
> -			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
> -	} else
> -		have_data = false;
> -
> -	virtblk_add_req(vbr, have_data);
> -}
> -
> -static void virtblk_bio_send_data_work(struct work_struct *work)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = container_of(work, struct virtblk_req, work);
> -
> -	virtblk_bio_send_data(vbr);
> -}
> -
> -static void virtblk_bio_send_flush_work(struct work_struct *work)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = container_of(work, struct virtblk_req, work);
> -
> -	virtblk_bio_send_flush(vbr);
> -}
> -
>  static inline void virtblk_request_done(struct virtblk_req *vbr)
>  {
> -	struct virtio_blk *vblk = vbr->vblk;
>  	struct request *req = vbr->req;
>  	int error = virtblk_result(vbr);
>  
> @@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
>  		req->errors = (error != 0);
>  	}
>  
> -	__blk_end_request_all(req, error);
> -	mempool_free(vbr, vblk->pool);
> -}
> -
> -static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -
> -	if (vbr->flags & VBLK_REQ_DATA) {
> -		/* Send out the actual write data */
> -		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
> -		queue_work(virtblk_wq, &vbr->work);
> -	} else {
> -		bio_endio(vbr->bio, virtblk_result(vbr));
> -		mempool_free(vbr, vblk->pool);
> -	}
> -}
> -
> -static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -
> -	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
> -		/* Send out a flush before end the bio */
> -		vbr->flags &= ~VBLK_REQ_DATA;
> -		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
> -		queue_work(virtblk_wq, &vbr->work);
> -	} else {
> -		bio_endio(vbr->bio, virtblk_result(vbr));
> -		mempool_free(vbr, vblk->pool);
> -	}
> -}
> -
> -static inline void virtblk_bio_done(struct virtblk_req *vbr)
> -{
> -	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
> -		virtblk_bio_flush_done(vbr);
> -	else
> -		virtblk_bio_data_done(vbr);
> +	blk_mq_end_io(req, error);
>  }
>  
>  static void virtblk_done(struct virtqueue *vq)
>  {
>  	struct virtio_blk *vblk = vq->vdev->priv;
> -	bool bio_done = false, req_done = false;
> +	bool req_done = false;
>  	struct virtblk_req *vbr;
>  	unsigned long flags;
>  	unsigned int len;
>  
> -	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
> +	spin_lock_irqsave(&vblk->vq_lock, flags);
>  	do {
>  		virtqueue_disable_cb(vq);
>  		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
> -			if (vbr->bio) {
> -				virtblk_bio_done(vbr);
> -				bio_done = true;
> -			} else {
> -				virtblk_request_done(vbr);
> -				req_done = true;
> -			}
> +			virtblk_request_done(vbr);
> +			req_done = true;
>  		}
>  	} while (!virtqueue_enable_cb(vq));
> +	spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +
>  	/* In case queue is stopped waiting for more buffers. */
>  	if (req_done)
> -		blk_start_queue(vblk->disk->queue);
> -	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
> -
> -	if (bio_done)
> -		wake_up(&vblk->queue_wait);
> +		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
>  }
>  
> -static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> -		   struct request *req)
> +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
>  {
> +	struct virtio_blk *vblk = hctx->queue->queuedata;
> +	struct virtblk_req *vbr = req->special;
> +	unsigned long flags;
>  	unsigned int num;
> -	struct virtblk_req *vbr;
> +	const bool last = (req->cmd_flags & REQ_END) != 0;
>  
> -	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
> -	if (!vbr)
> -		/* When another request finishes we'll try again. */
> -		return false;
> +	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
>  
>  	vbr->req = req;
> -	vbr->bio = NULL;
>  	if (req->cmd_flags & REQ_FLUSH) {
>  		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
>  		vbr->out_hdr.sector = 0;
> @@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
>  		}
>  	}
>  
> -	num = blk_rq_map_sg(q, vbr->req, vblk->sg);
> +	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(vbr->req) == WRITE)
>  			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> @@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
>  			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
>  	}
>  
> -	if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
> -		mempool_free(vbr, vblk->pool);
> -		return false;
> -	}
> -
> -	return true;
> -}
> -
> -static void virtblk_request(struct request_queue *q)
> -{
> -	struct virtio_blk *vblk = q->queuedata;
> -	struct request *req;
> -	unsigned int issued = 0;
> -
> -	while ((req = blk_peek_request(q)) != NULL) {
> -		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> -
> -		/* If this request fails, stop queue and wait for something to
> -		   finish to restart it. */
> -		if (!do_req(q, vblk, req)) {
> -			blk_stop_queue(q);
> -			break;
> -		}
> -		blk_start_request(req);
> -		issued++;
> -	}
> -
> -	if (issued)
> +	spin_lock_irqsave(&vblk->vq_lock, flags);
> +	if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
> +		spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +		blk_mq_stop_hw_queue(hctx);
>  		virtqueue_kick(vblk->vq);
> -}
> -
> -static void virtblk_make_request(struct request_queue *q, struct bio *bio)
> -{
> -	struct virtio_blk *vblk = q->queuedata;
> -	struct virtblk_req *vbr;
> -
> -	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
> -
> -	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
> -	if (!vbr) {
> -		bio_endio(bio, -ENOMEM);
> -		return;
> +		return BLK_MQ_RQ_QUEUE_BUSY;
>  	}
> +	spin_unlock_irqrestore(&vblk->vq_lock, flags);
>  
> -	vbr->bio = bio;
> -	vbr->flags = 0;
> -	if (bio->bi_rw & REQ_FLUSH)
> -		vbr->flags |= VBLK_REQ_FLUSH;
> -	if (bio->bi_rw & REQ_FUA)
> -		vbr->flags |= VBLK_REQ_FUA;
> -	if (bio->bi_size)
> -		vbr->flags |= VBLK_REQ_DATA;
> -
> -	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
> -		virtblk_bio_send_flush(vbr);
> -	else
> -		virtblk_bio_send_data(vbr);
> +	if (last)
> +		virtqueue_kick(vblk->vq);
> +	return BLK_MQ_RQ_QUEUE_OK;
>  }
>  
>  /* return id (s/n) string for *disk to *id_str
> @@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
>  	__ATTR(cache_type, S_IRUGO|S_IWUSR,
>  	       virtblk_cache_type_show, virtblk_cache_type_store);
>  
> +static struct blk_mq_ops virtio_mq_ops = {
> +	.queue_rq	= virtio_queue_rq,
> +	.map_queue	= blk_mq_map_queue,
> +	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
> +	.free_hctx	= blk_mq_free_single_hw_queue,
> +};
> +
> +static struct blk_mq_reg virtio_mq_reg = {
> +	.ops		= &virtio_mq_ops,
> +	.nr_hw_queues	= 4,
> +	.queue_depth	= 64,
> +	.numa_node	= NUMA_NO_NODE,
> +	.flags		= BLK_MQ_F_SHOULD_MERGE,
> +};
> +
> +static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
> +			     struct request *rq, unsigned int nr)
> +{
> +	struct virtio_blk *vblk = data;
> +	struct virtblk_req *vbr = rq->special;
> +
> +	sg_init_table(vbr->sg, vblk->sg_elems);
> +}
> +
>  static int virtblk_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_blk *vblk;
>  	struct request_queue *q;
>  	int err, index;
> -	int pool_size;
>  
>  	u64 cap;
>  	u32 v, blk_size, sg_elems, opt_io_size;
> @@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev)
>  
>  	/* We need an extra sg elements at head and tail. */
>  	sg_elems += 2;
> -	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
> -				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
> +	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
>  	if (!vblk) {
>  		err = -ENOMEM;
>  		goto out_free_index;
>  	}
>  
> -	init_waitqueue_head(&vblk->queue_wait);
>  	vblk->vdev = vdev;
>  	vblk->sg_elems = sg_elems;
> -	sg_init_table(vblk->sg, vblk->sg_elems);
>  	mutex_init(&vblk->config_lock);
>  
>  	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
> @@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	err = init_vq(vblk);
>  	if (err)
>  		goto out_free_vblk;
> -
> -	pool_size = sizeof(struct virtblk_req);
> -	if (use_bio)
> -		pool_size += sizeof(struct scatterlist) * sg_elems;
> -	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
> -	if (!vblk->pool) {
> -		err = -ENOMEM;
> -		goto out_free_vq;
> -	}
> +	spin_lock_init(&vblk->vq_lock);
>  
>  	/* FIXME: How many partitions?  How long is a piece of string? */
>  	vblk->disk = alloc_disk(1 << PART_BITS);
>  	if (!vblk->disk) {
>  		err = -ENOMEM;
> -		goto out_mempool;
> +		goto out_free_vq;
>  	}
>  
> -	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
> +	virtio_mq_reg.cmd_size =
> +		sizeof(struct virtblk_req) +
> +		sizeof(struct scatterlist) * sg_elems;
> +
> +	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
>  	if (!q) {
>  		err = -ENOMEM;
>  		goto out_put_disk;
>  	}
>  
> -	if (use_bio)
> -		blk_queue_make_request(q, virtblk_make_request);
> +	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
> +
>  	q->queuedata = vblk;
>  
>  	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
> @@ -857,8 +683,6 @@ out_del_disk:
>  	blk_cleanup_queue(vblk->disk->queue);
>  out_put_disk:
>  	put_disk(vblk->disk);
> -out_mempool:
> -	mempool_destroy(vblk->pool);
>  out_free_vq:
>  	vdev->config->del_vqs(vdev);
>  out_free_vblk:
> @@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev)
>  
>  	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
>  	put_disk(vblk->disk);
> -	mempool_destroy(vblk->pool);
>  	vdev->config->del_vqs(vdev);
>  	kfree(vblk);
>  
> @@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
>  
>  	flush_work(&vblk->config_work);
>  
> -	spin_lock_irq(vblk->disk->queue->queue_lock);
> -	blk_stop_queue(vblk->disk->queue);
> -	spin_unlock_irq(vblk->disk->queue->queue_lock);
> -	blk_sync_queue(vblk->disk->queue);
> +	blk_mq_stop_hw_queues(vblk->disk->queue);
>  
>  	vdev->config->del_vqs(vdev);
>  	return 0;
> @@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev)
>  
>  	vblk->config_enable = true;
>  	ret = init_vq(vdev->priv);
> -	if (!ret) {
> -		spin_lock_irq(vblk->disk->queue->queue_lock);
> -		blk_start_queue(vblk->disk->queue);
> -		spin_unlock_irq(vblk->disk->queue->queue_lock);
> -	}
> +	if (!ret)
> +		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
> +
>  	return ret;
>  }
>  #endif
> -- 
> 1.7.10.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-28  2:47   ` Rusty Russell
@ 2013-10-28  8:52     ` Christoph Hellwig
  2013-10-29 21:34       ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2013-10-28  8:52 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Jens Axboe, Asias He, linux-kernel

On Mon, Oct 28, 2013 at 01:17:54PM +1030, Rusty Russell wrote:
> Let's pretend I'm stupid.
> 
> We don't actually have multiple queues through to the host, but we're
> pretending to, because it makes the block layer go faster?
> 
> Do I want to know *why* it's faster?  Or should I look the other way?

You shouldn't.  To how multiple queues benefit here I'd like to defer to
Jens, given the single workqueue I don't really know where to look here.

The real benefit that unfortunately wasn't obvious from the description
is that even with just a single queue the blk-multiqueue infrastructure
will be a lot faster, because it is designed in a much more streaminline
fashion and avoids lots of lock roundtrips both during submission itself
and for submission vs complettion.  Back when I tried to get virtio-blk
to perform well on high-end flash (the work that Asias took over later)
the queue_lock contention was the major issue in virtio-blk and this
patch gets rid of that even with a single queue.

A good example are the patches from Nick to move scsi drivers over to
the infrastructure that only support a single queue.  Even that gave
over a 10 fold improvement over the old code.

Unfortunately I do not have access to this kind of hardware at the
moment, but I'd love to see if Asias or anyone at Red Hat could redo
those old numbers.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-28  8:52     ` Christoph Hellwig
@ 2013-10-29 21:34       ` Jens Axboe
  2013-10-30  8:51         ` Asias He
  0 siblings, 1 reply; 12+ messages in thread
From: Jens Axboe @ 2013-10-29 21:34 UTC (permalink / raw)
  To: Christoph Hellwig, Rusty Russell; +Cc: Asias He, linux-kernel

On 10/28/2013 02:52 AM, Christoph Hellwig wrote:
> On Mon, Oct 28, 2013 at 01:17:54PM +1030, Rusty Russell wrote:
>> Let's pretend I'm stupid.
>>
>> We don't actually have multiple queues through to the host, but we're
>> pretending to, because it makes the block layer go faster?
>>
>> Do I want to know *why* it's faster?  Or should I look the other way?
> 
> You shouldn't.  To how multiple queues benefit here I'd like to defer to
> Jens, given the single workqueue I don't really know where to look here.

The 4 was chosen to "have some number of multiple queues" and to be able
to exercise that part, no real performance testing was done by me after
the implementation to verify whether it was faster at 1, 2, 4, or
others. But it was useful for that! For merging, we can easily just make
it 1 since that's the most logical transformation. I can set some time
aside to play with multiple queues and see if we gain anything, but that
can be done post merge.

> The real benefit that unfortunately wasn't obvious from the description
> is that even with just a single queue the blk-multiqueue infrastructure
> will be a lot faster, because it is designed in a much more streaminline
> fashion and avoids lots of lock roundtrips both during submission itself
> and for submission vs complettion.  Back when I tried to get virtio-blk
> to perform well on high-end flash (the work that Asias took over later)
> the queue_lock contention was the major issue in virtio-blk and this
> patch gets rid of that even with a single queue.
> 
> A good example are the patches from Nick to move scsi drivers over to
> the infrastructure that only support a single queue.  Even that gave
> over a 10 fold improvement over the old code.
> 
> Unfortunately I do not have access to this kind of hardware at the
> moment, but I'd love to see if Asias or anyone at Red Hat could redo
> those old numbers.

I've got a variety of fast devices, so should be able to run that.
Asias, let me know what your position is, it'd be great to have
independent results.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-29 21:34       ` Jens Axboe
@ 2013-10-30  8:51         ` Asias He
  2013-11-01 16:45           ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Asias He @ 2013-10-30  8:51 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, Rusty Russell, linux-kernel

Hello Jens,

On Tue, Oct 29, 2013 at 03:34:01PM -0600, Jens Axboe wrote:
> On 10/28/2013 02:52 AM, Christoph Hellwig wrote:
> > On Mon, Oct 28, 2013 at 01:17:54PM +1030, Rusty Russell wrote:
> >> Let's pretend I'm stupid.
> >>
> >> We don't actually have multiple queues through to the host, but we're
> >> pretending to, because it makes the block layer go faster?
> >>
> >> Do I want to know *why* it's faster?  Or should I look the other way?
> > 
> > You shouldn't.  To how multiple queues benefit here I'd like to defer to
> > Jens, given the single workqueue I don't really know where to look here.
> 
> The 4 was chosen to "have some number of multiple queues" and to be able
> to exercise that part, no real performance testing was done by me after
> the implementation to verify whether it was faster at 1, 2, 4, or
> others. But it was useful for that! For merging, we can easily just make
> it 1 since that's the most logical transformation. I can set some time
> aside to play with multiple queues and see if we gain anything, but that
> can be done post merge.
> 
> > The real benefit that unfortunately wasn't obvious from the description
> > is that even with just a single queue the blk-multiqueue infrastructure
> > will be a lot faster, because it is designed in a much more streaminline
> > fashion and avoids lots of lock roundtrips both during submission itself
> > and for submission vs complettion.  Back when I tried to get virtio-blk
> > to perform well on high-end flash (the work that Asias took over later)
> > the queue_lock contention was the major issue in virtio-blk and this
> > patch gets rid of that even with a single queue.
> > 
> > A good example are the patches from Nick to move scsi drivers over to
> > the infrastructure that only support a single queue.  Even that gave
> > over a 10 fold improvement over the old code.
> > 
> > Unfortunately I do not have access to this kind of hardware at the
> > moment, but I'd love to see if Asias or anyone at Red Hat could redo
> > those old numbers.
> 
> I've got a variety of fast devices, so should be able to run that.
> Asias, let me know what your position is, it'd be great to have
> independent results.

I'd love to run the test but I do not have fast devices around. It would
be nice if Nick can give a try ;-)

1) Set .nr_hw_queues to 1 instead 4 for now.
2) Drop some more unused code in the other mail I sent

Otherwise, feel free to add:

Acked-by: Asias He <asias@redhat.com>

> -- 
> Jens Axboe
> 

-- 
Asias

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-10-30  8:51         ` Asias He
@ 2013-11-01 16:45           ` Christoph Hellwig
  2013-11-01 17:00             ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2013-11-01 16:45 UTC (permalink / raw)
  To: Asias He; +Cc: Jens Axboe, Christoph Hellwig, Rusty Russell, linux-kernel

Updated version to address Asias' comments and rebased ontop of
block/for-next with the immutable bio changes:

---

From: Jens Axboe <axboe@kernel.dk>
Subject: virtio_blk: blk-mq support
    
Switch virtio-blk from the dual support for old-style requests and bios
to use the block-multiqueue.
    
Acked-by: Asias He <asias@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christoph Hellwig <hch@lst.de>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 93fde08..7455fe2 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -11,12 +11,11 @@
 #include <linux/string_helpers.h>
 #include <scsi/scsi_cmnd.h>
 #include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include <linux/numa.h>
 
 #define PART_BITS 4
 
-static bool use_bio;
-module_param(use_bio, bool, S_IRUGO);
-
 static int major;
 static DEFINE_IDA(vd_index_ida);
 
@@ -26,13 +25,11 @@ struct virtio_blk
 {
 	struct virtio_device *vdev;
 	struct virtqueue *vq;
-	wait_queue_head_t queue_wait;
+	spinlock_t vq_lock;
 
 	/* The disk structure for the kernel. */
 	struct gendisk *disk;
 
-	mempool_t *pool;
-
 	/* Process context for config space updates */
 	struct work_struct config_work;
 
@@ -47,31 +44,17 @@ struct virtio_blk
 
 	/* Ida index - used to track minor number allocations. */
 	int index;
-
-	/* Scatterlist: can be too big for stack. */
-	struct scatterlist sg[/*sg_elems*/];
 };
 
 struct virtblk_req
 {
 	struct request *req;
-	struct bio *bio;
 	struct virtio_blk_outhdr out_hdr;
 	struct virtio_scsi_inhdr in_hdr;
-	struct work_struct work;
-	struct virtio_blk *vblk;
-	int flags;
 	u8 status;
 	struct scatterlist sg[];
 };
 
-enum {
-	VBLK_IS_FLUSH		= 1,
-	VBLK_REQ_FLUSH		= 2,
-	VBLK_REQ_DATA		= 4,
-	VBLK_REQ_FUA		= 8,
-};
-
 static inline int virtblk_result(struct virtblk_req *vbr)
 {
 	switch (vbr->status) {
@@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
 	}
 }
 
-static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
-						    gfp_t gfp_mask)
-{
-	struct virtblk_req *vbr;
-
-	vbr = mempool_alloc(vblk->pool, gfp_mask);
-	if (!vbr)
-		return NULL;
-
-	vbr->vblk = vblk;
-	if (use_bio)
-		sg_init_table(vbr->sg, vblk->sg_elems);
-
-	return vbr;
-}
-
 static int __virtblk_add_req(struct virtqueue *vq,
 			     struct virtblk_req *vbr,
 			     struct scatterlist *data_sg,
@@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
-static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-	DEFINE_WAIT(wait);
-	int ret;
-
-	spin_lock_irq(vblk->disk->queue->queue_lock);
-	while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
-						 have_data)) < 0)) {
-		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
-					  TASK_UNINTERRUPTIBLE);
-
-		spin_unlock_irq(vblk->disk->queue->queue_lock);
-		io_schedule();
-		spin_lock_irq(vblk->disk->queue->queue_lock);
-
-		finish_wait(&vblk->queue_wait, &wait);
-	}
-
-	virtqueue_kick(vblk->vq);
-	spin_unlock_irq(vblk->disk->queue->queue_lock);
-}
-
-static void virtblk_bio_send_flush(struct virtblk_req *vbr)
-{
-	vbr->flags |= VBLK_IS_FLUSH;
-	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
-	vbr->out_hdr.sector = 0;
-	vbr->out_hdr.ioprio = 0;
-
-	virtblk_add_req(vbr, false);
-}
-
-static void virtblk_bio_send_data(struct virtblk_req *vbr)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-	struct bio *bio = vbr->bio;
-	bool have_data;
-
-	vbr->flags &= ~VBLK_IS_FLUSH;
-	vbr->out_hdr.type = 0;
-	vbr->out_hdr.sector = bio->bi_iter.bi_sector;
-	vbr->out_hdr.ioprio = bio_prio(bio);
-
-	if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
-		have_data = true;
-		if (bio->bi_rw & REQ_WRITE)
-			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
-		else
-			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
-	} else
-		have_data = false;
-
-	virtblk_add_req(vbr, have_data);
-}
-
-static void virtblk_bio_send_data_work(struct work_struct *work)
-{
-	struct virtblk_req *vbr;
-
-	vbr = container_of(work, struct virtblk_req, work);
-
-	virtblk_bio_send_data(vbr);
-}
-
-static void virtblk_bio_send_flush_work(struct work_struct *work)
-{
-	struct virtblk_req *vbr;
-
-	vbr = container_of(work, struct virtblk_req, work);
-
-	virtblk_bio_send_flush(vbr);
-}
-
 static inline void virtblk_request_done(struct virtblk_req *vbr)
 {
-	struct virtio_blk *vblk = vbr->vblk;
 	struct request *req = vbr->req;
 	int error = virtblk_result(vbr);
 
@@ -231,90 +123,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
 		req->errors = (error != 0);
 	}
 
-	__blk_end_request_all(req, error);
-	mempool_free(vbr, vblk->pool);
-}
-
-static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-
-	if (vbr->flags & VBLK_REQ_DATA) {
-		/* Send out the actual write data */
-		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
-		queue_work(virtblk_wq, &vbr->work);
-	} else {
-		bio_endio(vbr->bio, virtblk_result(vbr));
-		mempool_free(vbr, vblk->pool);
-	}
-}
-
-static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
-{
-	struct virtio_blk *vblk = vbr->vblk;
-
-	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
-		/* Send out a flush before end the bio */
-		vbr->flags &= ~VBLK_REQ_DATA;
-		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
-		queue_work(virtblk_wq, &vbr->work);
-	} else {
-		bio_endio(vbr->bio, virtblk_result(vbr));
-		mempool_free(vbr, vblk->pool);
-	}
-}
-
-static inline void virtblk_bio_done(struct virtblk_req *vbr)
-{
-	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
-		virtblk_bio_flush_done(vbr);
-	else
-		virtblk_bio_data_done(vbr);
+	blk_mq_end_io(req, error);
 }
 
 static void virtblk_done(struct virtqueue *vq)
 {
 	struct virtio_blk *vblk = vq->vdev->priv;
-	bool bio_done = false, req_done = false;
+	bool req_done = false;
 	struct virtblk_req *vbr;
 	unsigned long flags;
 	unsigned int len;
 
-	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
+	spin_lock_irqsave(&vblk->vq_lock, flags);
 	do {
 		virtqueue_disable_cb(vq);
 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
-			if (vbr->bio) {
-				virtblk_bio_done(vbr);
-				bio_done = true;
-			} else {
-				virtblk_request_done(vbr);
-				req_done = true;
-			}
+			virtblk_request_done(vbr);
+			req_done = true;
 		}
 	} while (!virtqueue_enable_cb(vq));
+	spin_unlock_irqrestore(&vblk->vq_lock, flags);
+
 	/* In case queue is stopped waiting for more buffers. */
 	if (req_done)
-		blk_start_queue(vblk->disk->queue);
-	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
-
-	if (bio_done)
-		wake_up(&vblk->queue_wait);
+		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
 }
 
-static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
-		   struct request *req)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 {
+	struct virtio_blk *vblk = hctx->queue->queuedata;
+	struct virtblk_req *vbr = req->special;
+	unsigned long flags;
 	unsigned int num;
-	struct virtblk_req *vbr;
+	const bool last = (req->cmd_flags & REQ_END) != 0;
 
-	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
-	if (!vbr)
-		/* When another request finishes we'll try again. */
-		return false;
+	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
 	vbr->req = req;
-	vbr->bio = NULL;
 	if (req->cmd_flags & REQ_FLUSH) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
 		vbr->out_hdr.sector = 0;
@@ -342,7 +187,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	num = blk_rq_map_sg(q, vbr->req, vblk->sg);
+	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(vbr->req) == WRITE)
 			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -350,63 +195,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
 	}
 
-	if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
-		mempool_free(vbr, vblk->pool);
-		return false;
-	}
-
-	return true;
-}
-
-static void virtblk_request(struct request_queue *q)
-{
-	struct virtio_blk *vblk = q->queuedata;
-	struct request *req;
-	unsigned int issued = 0;
-
-	while ((req = blk_peek_request(q)) != NULL) {
-		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
-
-		/* If this request fails, stop queue and wait for something to
-		   finish to restart it. */
-		if (!do_req(q, vblk, req)) {
-			blk_stop_queue(q);
-			break;
-		}
-		blk_start_request(req);
-		issued++;
-	}
-
-	if (issued)
+	spin_lock_irqsave(&vblk->vq_lock, flags);
+	if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+		spin_unlock_irqrestore(&vblk->vq_lock, flags);
+		blk_mq_stop_hw_queue(hctx);
 		virtqueue_kick(vblk->vq);
-}
-
-static void virtblk_make_request(struct request_queue *q, struct bio *bio)
-{
-	struct virtio_blk *vblk = q->queuedata;
-	struct virtblk_req *vbr;
-
-	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
-
-	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
-	if (!vbr) {
-		bio_endio(bio, -ENOMEM);
-		return;
+		return BLK_MQ_RQ_QUEUE_BUSY;
 	}
+	spin_unlock_irqrestore(&vblk->vq_lock, flags);
 
-	vbr->bio = bio;
-	vbr->flags = 0;
-	if (bio->bi_rw & REQ_FLUSH)
-		vbr->flags |= VBLK_REQ_FLUSH;
-	if (bio->bi_rw & REQ_FUA)
-		vbr->flags |= VBLK_REQ_FUA;
-	if (bio->bi_iter.bi_size)
-		vbr->flags |= VBLK_REQ_DATA;
-
-	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
-		virtblk_bio_send_flush(vbr);
-	else
-		virtblk_bio_send_data(vbr);
+	if (last)
+		virtqueue_kick(vblk->vq);
+	return BLK_MQ_RQ_QUEUE_OK;
 }
 
 /* return id (s/n) string for *disk to *id_str
@@ -680,12 +480,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
 	       virtblk_cache_type_show, virtblk_cache_type_store);
 
+static struct blk_mq_ops virtio_mq_ops = {
+	.queue_rq	= virtio_queue_rq,
+	.map_queue	= blk_mq_map_queue,
+	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
+	.free_hctx	= blk_mq_free_single_hw_queue,
+};
+
+static struct blk_mq_reg virtio_mq_reg = {
+	.ops		= &virtio_mq_ops,
+	.nr_hw_queues	= 1,
+	.queue_depth	= 64,
+	.numa_node	= NUMA_NO_NODE,
+	.flags		= BLK_MQ_F_SHOULD_MERGE,
+};
+
+static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
+			     struct request *rq, unsigned int nr)
+{
+	struct virtio_blk *vblk = data;
+	struct virtblk_req *vbr = rq->special;
+
+	sg_init_table(vbr->sg, vblk->sg_elems);
+}
+
 static int virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
 	struct request_queue *q;
 	int err, index;
-	int pool_size;
 
 	u64 cap;
 	u32 v, blk_size, sg_elems, opt_io_size;
@@ -709,17 +532,14 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 	/* We need an extra sg elements at head and tail. */
 	sg_elems += 2;
-	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
-				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
+	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
 	if (!vblk) {
 		err = -ENOMEM;
 		goto out_free_index;
 	}
 
-	init_waitqueue_head(&vblk->queue_wait);
 	vblk->vdev = vdev;
 	vblk->sg_elems = sg_elems;
-	sg_init_table(vblk->sg, vblk->sg_elems);
 	mutex_init(&vblk->config_lock);
 
 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -728,31 +548,27 @@ static int virtblk_probe(struct virtio_device *vdev)
 	err = init_vq(vblk);
 	if (err)
 		goto out_free_vblk;
-
-	pool_size = sizeof(struct virtblk_req);
-	if (use_bio)
-		pool_size += sizeof(struct scatterlist) * sg_elems;
-	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
-	if (!vblk->pool) {
-		err = -ENOMEM;
-		goto out_free_vq;
-	}
+	spin_lock_init(&vblk->vq_lock);
 
 	/* FIXME: How many partitions?  How long is a piece of string? */
 	vblk->disk = alloc_disk(1 << PART_BITS);
 	if (!vblk->disk) {
 		err = -ENOMEM;
-		goto out_mempool;
+		goto out_free_vq;
 	}
 
-	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
+	virtio_mq_reg.cmd_size =
+		sizeof(struct virtblk_req) +
+		sizeof(struct scatterlist) * sg_elems;
+
+	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
 	if (!q) {
 		err = -ENOMEM;
 		goto out_put_disk;
 	}
 
-	if (use_bio)
-		blk_queue_make_request(q, virtblk_make_request);
+	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
+
 	q->queuedata = vblk;
 
 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -857,8 +673,6 @@ out_del_disk:
 	blk_cleanup_queue(vblk->disk->queue);
 out_put_disk:
 	put_disk(vblk->disk);
-out_mempool:
-	mempool_destroy(vblk->pool);
 out_free_vq:
 	vdev->config->del_vqs(vdev);
 out_free_vblk:
@@ -890,7 +704,6 @@ static void virtblk_remove(struct virtio_device *vdev)
 
 	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
 	put_disk(vblk->disk);
-	mempool_destroy(vblk->pool);
 	vdev->config->del_vqs(vdev);
 	kfree(vblk);
 
@@ -914,10 +727,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
 
 	flush_work(&vblk->config_work);
 
-	spin_lock_irq(vblk->disk->queue->queue_lock);
-	blk_stop_queue(vblk->disk->queue);
-	spin_unlock_irq(vblk->disk->queue->queue_lock);
-	blk_sync_queue(vblk->disk->queue);
+	blk_mq_stop_hw_queues(vblk->disk->queue);
 
 	vdev->config->del_vqs(vdev);
 	return 0;
@@ -930,11 +740,9 @@ static int virtblk_restore(struct virtio_device *vdev)
 
 	vblk->config_enable = true;
 	ret = init_vq(vdev->priv);
-	if (!ret) {
-		spin_lock_irq(vblk->disk->queue->queue_lock);
-		blk_start_queue(vblk->disk->queue);
-		spin_unlock_irq(vblk->disk->queue->queue_lock);
-	}
+	if (!ret)
+		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
+
 	return ret;
 }
 #endif

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] virtio_blk: blk-mq support
  2013-11-01 16:45           ` Christoph Hellwig
@ 2013-11-01 17:00             ` Jens Axboe
  0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2013-11-01 17:00 UTC (permalink / raw)
  To: Christoph Hellwig, Asias He; +Cc: Rusty Russell, linux-kernel

On 11/01/2013 10:45 AM, Christoph Hellwig wrote:
> Updated version to address Asias' comments and rebased ontop of
> block/for-next with the immutable bio changes:

Thanks, I updated the blk-mq/drivers branch to have the for-3.13/core
immutable changes and replaced the existing virtio-blk patch.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-11-01 17:00 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-10-25 13:04 [PATCH 0/2] block multiqueue support for virtio-blk Christoph Hellwig
2013-10-25 13:05 ` [PATCH 1/2] blk-mq: add blk_mq_stop_hw_queues Christoph Hellwig
2013-10-25 13:44   ` Jens Axboe
2013-10-25 13:05 ` [PATCH 2/2] virtio_blk: blk-mq support Christoph Hellwig
2013-10-25 13:51   ` Jens Axboe
2013-10-25 16:06   ` Asias He
2013-10-28  2:47   ` Rusty Russell
2013-10-28  8:52     ` Christoph Hellwig
2013-10-29 21:34       ` Jens Axboe
2013-10-30  8:51         ` Asias He
2013-11-01 16:45           ` Christoph Hellwig
2013-11-01 17:00             ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.