linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Bob Liu <bob.liu@oracle.com>, david.vrabel@citrix.com
Cc: xen-devel@lists.xen.org, david.vrabel@citrix.com,
	linux-kernel@vger.kernel.org, roger.pau@citrix.com,
	felipe.franciosi@citrix.com, axboe@fb.com, hch@infradead.org,
	avanzini.arianna@gmail.com, rafal.mielniczuk@citrix.com,
	boris.ostrovsky@oracle.com, jonathan.davies@citrix.com
Subject: Re: [PATCH v3 1/9] xen-blkfront: convert to blk-mq APIs
Date: Wed, 23 Sep 2015 16:31:21 -0400	[thread overview]
Message-ID: <20150923203121.GA30295@l.oracle.com> (raw)
In-Reply-To: <1441456782-31318-2-git-send-email-bob.liu@oracle.com>

On Sat, Sep 05, 2015 at 08:39:34PM +0800, Bob Liu wrote:
> Note: This patch is based on original work of Arianna's internship for
> GNOME's Outreach Program for Women.
> 
> Only one hardware queue is used now, so there is no significant
> performance change
> 
> The legacy non-mq code is deleted completely which is the same as other
> drivers like virtio, mtip, and nvme.
> 
> Also dropped one unnecessary holding of info->io_lock when calling
> blk_mq_stop_hw_queues().
> 
> Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Jens Axboe <axboe@fb.com>
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>

Odd.

This should have gone in Linux 4.3 but it did not? I remember seeing it
there? I think?

Anyhow I will put this in my queue for 4.4.
> ---
>  drivers/block/xen-blkfront.c |  146 +++++++++++++++++-------------------------
>  1 file changed, 60 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 7a8a73f..5dd591d 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -37,6 +37,7 @@
>  
>  #include <linux/interrupt.h>
>  #include <linux/blkdev.h>
> +#include <linux/blk-mq.h>
>  #include <linux/hdreg.h>
>  #include <linux/cdrom.h>
>  #include <linux/module.h>
> @@ -148,6 +149,7 @@ struct blkfront_info
>  	unsigned int feature_persistent:1;
>  	unsigned int max_indirect_segments;
>  	int is_ready;
> +	struct blk_mq_tag_set tag_set;
>  };
>  
>  static unsigned int nr_minors;
> @@ -617,54 +619,41 @@ static inline bool blkif_request_flush_invalid(struct request *req,
>  		 !(info->feature_flush & REQ_FUA)));
>  }
>  
> -/*
> - * do_blkif_request
> - *  read a block; request is in a request queue
> - */
> -static void do_blkif_request(struct request_queue *rq)
> +static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
> +			   const struct blk_mq_queue_data *qd)
>  {
> -	struct blkfront_info *info = NULL;
> -	struct request *req;
> -	int queued;
> -
> -	pr_debug("Entered do_blkif_request\n");
> -
> -	queued = 0;
> +	struct blkfront_info *info = qd->rq->rq_disk->private_data;
>  
> -	while ((req = blk_peek_request(rq)) != NULL) {
> -		info = req->rq_disk->private_data;
> -
> -		if (RING_FULL(&info->ring))
> -			goto wait;
> +	blk_mq_start_request(qd->rq);
> +	spin_lock_irq(&info->io_lock);
> +	if (RING_FULL(&info->ring))
> +		goto out_busy;
>  
> -		blk_start_request(req);
> +	if (blkif_request_flush_invalid(qd->rq, info))
> +		goto out_err;
>  
> -		if (blkif_request_flush_invalid(req, info)) {
> -			__blk_end_request_all(req, -EOPNOTSUPP);
> -			continue;
> -		}
> +	if (blkif_queue_request(qd->rq))
> +		goto out_busy;
>  
> -		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
> -			 "(%u/%u) [%s]\n",
> -			 req, req->cmd, (unsigned long)blk_rq_pos(req),
> -			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
> -			 rq_data_dir(req) ? "write" : "read");
> -
> -		if (blkif_queue_request(req)) {
> -			blk_requeue_request(rq, req);
> -wait:
> -			/* Avoid pointless unplugs. */
> -			blk_stop_queue(rq);
> -			break;
> -		}
> +	flush_requests(info);
> +	spin_unlock_irq(&info->io_lock);
> +	return BLK_MQ_RQ_QUEUE_OK;
>  
> -		queued++;
> -	}
> +out_err:
> +	spin_unlock_irq(&info->io_lock);
> +	return BLK_MQ_RQ_QUEUE_ERROR;
>  
> -	if (queued != 0)
> -		flush_requests(info);
> +out_busy:
> +	spin_unlock_irq(&info->io_lock);
> +	blk_mq_stop_hw_queue(hctx);
> +	return BLK_MQ_RQ_QUEUE_BUSY;
>  }
>  
> +static struct blk_mq_ops blkfront_mq_ops = {
> +	.queue_rq = blkif_queue_rq,
> +	.map_queue = blk_mq_map_queue,
> +};
> +
>  static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
>  				unsigned int physical_sector_size,
>  				unsigned int segments)
> @@ -672,9 +661,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
>  	struct request_queue *rq;
>  	struct blkfront_info *info = gd->private_data;
>  
> -	rq = blk_init_queue(do_blkif_request, &info->io_lock);
> -	if (rq == NULL)
> +	memset(&info->tag_set, 0, sizeof(info->tag_set));
> +	info->tag_set.ops = &blkfront_mq_ops;
> +	info->tag_set.nr_hw_queues = 1;
> +	info->tag_set.queue_depth =  BLK_RING_SIZE(info);
> +	info->tag_set.numa_node = NUMA_NO_NODE;
> +	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
> +	info->tag_set.cmd_size = 0;
> +	info->tag_set.driver_data = info;
> +
> +	if (blk_mq_alloc_tag_set(&info->tag_set))
>  		return -1;
> +	rq = blk_mq_init_queue(&info->tag_set);
> +	if (IS_ERR(rq)) {
> +		blk_mq_free_tag_set(&info->tag_set);
> +		return -1;
> +	}
>  
>  	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
>  
> @@ -902,19 +904,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
>  static void xlvbd_release_gendisk(struct blkfront_info *info)
>  {
>  	unsigned int minor, nr_minors;
> -	unsigned long flags;
>  
>  	if (info->rq == NULL)
>  		return;
>  
> -	spin_lock_irqsave(&info->io_lock, flags);
> -
>  	/* No more blkif_request(). */
> -	blk_stop_queue(info->rq);
> +	blk_mq_stop_hw_queues(info->rq);
>  
>  	/* No more gnttab callback work. */
>  	gnttab_cancel_free_callback(&info->callback);
> -	spin_unlock_irqrestore(&info->io_lock, flags);
>  
>  	/* Flush gnttab callback work. Must be done with no locks held. */
>  	flush_work(&info->work);
> @@ -926,20 +924,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
>  	xlbd_release_minors(minor, nr_minors);
>  
>  	blk_cleanup_queue(info->rq);
> +	blk_mq_free_tag_set(&info->tag_set);
>  	info->rq = NULL;
>  
>  	put_disk(info->gd);
>  	info->gd = NULL;
>  }
>  
> +/* Must be called with io_lock holded */
>  static void kick_pending_request_queues(struct blkfront_info *info)
>  {
> -	if (!RING_FULL(&info->ring)) {
> -		/* Re-enable calldowns. */
> -		blk_start_queue(info->rq);
> -		/* Kick things off immediately. */
> -		do_blkif_request(info->rq);
> -	}
> +	if (!RING_FULL(&info->ring))
> +		blk_mq_start_stopped_hw_queues(info->rq, true);
>  }
>  
>  static void blkif_restart_queue(struct work_struct *work)
> @@ -964,7 +960,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>  		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
>  	/* No more blkif_request(). */
>  	if (info->rq)
> -		blk_stop_queue(info->rq);
> +		blk_mq_stop_hw_queues(info->rq);
>  
>  	/* Remove all persistent grants */
>  	if (!list_empty(&info->grants)) {
> @@ -1147,7 +1143,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  	RING_IDX i, rp;
>  	unsigned long flags;
>  	struct blkfront_info *info = (struct blkfront_info *)dev_id;
> -	int error;
>  
>  	spin_lock_irqsave(&info->io_lock, flags);
>  
> @@ -1188,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  			continue;
>  		}
>  
> -		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
> +		req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
>  		switch (bret->operation) {
>  		case BLKIF_OP_DISCARD:
>  			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
>  				struct request_queue *rq = info->rq;
>  				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
>  					   info->gd->disk_name, op_name(bret->operation));
> -				error = -EOPNOTSUPP;
> +				req->errors = -EOPNOTSUPP;
>  				info->feature_discard = 0;
>  				info->feature_secdiscard = 0;
>  				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
>  				queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
>  			}
> -			__blk_end_request_all(req, error);
> +			blk_mq_complete_request(req);
>  			break;
>  		case BLKIF_OP_FLUSH_DISKCACHE:
>  		case BLKIF_OP_WRITE_BARRIER:
>  			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
>  				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
>  				       info->gd->disk_name, op_name(bret->operation));
> -				error = -EOPNOTSUPP;
> +				req->errors = -EOPNOTSUPP;
>  			}
>  			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
>  				     info->shadow[id].req.u.rw.nr_segments == 0)) {
>  				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
>  				       info->gd->disk_name, op_name(bret->operation));
> -				error = -EOPNOTSUPP;
> +				req->errors = -EOPNOTSUPP;
>  			}
> -			if (unlikely(error)) {
> -				if (error == -EOPNOTSUPP)
> -					error = 0;
> +			if (unlikely(req->errors)) {
> +				if (req->errors == -EOPNOTSUPP)
> +					req->errors = 0;
>  				info->feature_flush = 0;
>  				xlvbd_flush(info);
>  			}
> @@ -1229,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
>  					"request: %x\n", bret->status);
>  
> -			__blk_end_request_all(req, error);
> +			blk_mq_complete_request(req);
>  			break;
>  		default:
>  			BUG();
> @@ -1558,28 +1553,6 @@ static int blkif_recover(struct blkfront_info *info)
>  
>  	kfree(copy);
>  
> -	/*
> -	 * Empty the queue, this is important because we might have
> -	 * requests in the queue with more segments than what we
> -	 * can handle now.
> -	 */
> -	spin_lock_irq(&info->io_lock);
> -	while ((req = blk_fetch_request(info->rq)) != NULL) {
> -		if (req->cmd_flags &
> -		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
> -			list_add(&req->queuelist, &requests);
> -			continue;
> -		}
> -		merge_bio.head = req->bio;
> -		merge_bio.tail = req->biotail;
> -		bio_list_merge(&bio_list, &merge_bio);
> -		req->bio = NULL;
> -		if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
> -			pr_alert("diskcache flush request found!\n");
> -		__blk_end_request_all(req, 0);
> -	}
> -	spin_unlock_irq(&info->io_lock);
> -
>  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
>  
>  	spin_lock_irq(&info->io_lock);
> @@ -1594,9 +1567,10 @@ static int blkif_recover(struct blkfront_info *info)
>  		/* Requeue pending requests (flush or discard) */
>  		list_del_init(&req->queuelist);
>  		BUG_ON(req->nr_phys_segments > segs);
> -		blk_requeue_request(info->rq, req);
> +		blk_mq_requeue_request(req);
>  	}
>  	spin_unlock_irq(&info->io_lock);
> +	blk_mq_kick_requeue_list(info->rq);
>  
>  	while ((bio = bio_list_pop(&bio_list)) != NULL) {
>  		/* Traverse the list of pending bios and re-queue them */
> -- 
> 1.7.10.4
> 

  reply	other threads:[~2015-09-23 21:04 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-05 12:39 [PATCH v3 0/9] xen-block: support multi hardware-queues/rings Bob Liu
2015-09-05 12:39 ` [PATCH v3 1/9] xen-blkfront: convert to blk-mq APIs Bob Liu
2015-09-23 20:31   ` Konrad Rzeszutek Wilk [this message]
2015-09-23 21:12     ` Konrad Rzeszutek Wilk
2015-09-05 12:39 ` [PATCH v3 2/9] xen-block: add document for mutli hardware queues/rings Bob Liu
2015-09-23 20:32   ` Konrad Rzeszutek Wilk
2015-10-02 16:04   ` Roger Pau Monné
2015-10-02 16:12     ` [Xen-devel] " Wei Liu
2015-10-02 16:22       ` Roger Pau Monné
2015-10-02 23:55         ` Bob Liu
2015-09-05 12:39 ` [PATCH v3 3/9] xen/blkfront: separate per ring information out of device info Bob Liu
2015-10-02 17:02   ` Roger Pau Monné
2015-10-03  0:34     ` Bob Liu
2015-10-05 15:17       ` Roger Pau Monné
2015-10-10  8:30     ` Bob Liu
2015-10-19  9:42       ` Roger Pau Monné
2015-09-05 12:39 ` [PATCH v3 4/9] xen/blkfront: pseudo support for multi hardware queues/rings Bob Liu
2015-10-05 10:52   ` Roger Pau Monné
2015-10-07 10:28     ` Bob Liu
2015-09-05 12:39 ` [PATCH v3 5/9] xen/blkfront: convert per device io_lock to per ring ring_lock Bob Liu
2015-10-05 14:13   ` Roger Pau Monné
2015-10-07 10:34     ` Bob Liu
2015-09-05 12:39 ` [PATCH v3 6/9] xen/blkfront: negotiate the number of hw queues/rings with backend Bob Liu
2015-10-05 14:40   ` Roger Pau Monné
2015-10-07 10:39     ` Bob Liu
2015-10-07 11:46       ` Roger Pau Monné
2015-10-07 12:19         ` Bob Liu
2015-09-05 12:39 ` [PATCH v3 7/9] xen/blkback: separate ring information out of struct xen_blkif Bob Liu
2015-10-05 14:55   ` Roger Pau Monné
2015-10-07 10:41     ` Bob Liu
2015-10-10  4:08     ` Bob Liu
2015-10-19  9:36       ` Roger Pau Monné
2015-10-19 10:03         ` Bob Liu
2015-10-05 14:55   ` Roger Pau Monné
2015-09-05 12:39 ` [PATCH v3 8/9] xen/blkback: pseudo support for multi hardware queues/rings Bob Liu
2015-10-05 15:08   ` Roger Pau Monné
2015-10-07 10:50     ` Bob Liu
2015-10-07 11:49       ` Roger Pau Monné
2015-09-05 12:39 ` [PATCH v3 9/9] xen/blkback: get number of hardware queues/rings from blkfront Bob Liu
2015-10-05 15:15   ` Roger Pau Monné
2015-10-07 10:54     ` Bob Liu
2015-10-02  9:57 ` [PATCH v3 0/9] xen-block: support multi hardware-queues/rings Rafal Mielniczuk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150923203121.GA30295@l.oracle.com \
    --to=konrad.wilk@oracle.com \
    --cc=avanzini.arianna@gmail.com \
    --cc=axboe@fb.com \
    --cc=bob.liu@oracle.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=david.vrabel@citrix.com \
    --cc=felipe.franciosi@citrix.com \
    --cc=hch@infradead.org \
    --cc=jonathan.davies@citrix.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rafal.mielniczuk@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).