From mboxrd@z Thu Jan 1 00:00:00 1970 From: Konrad Rzeszutek Wilk Subject: Re: [PATCH v3 1/9] xen-blkfront: convert to blk-mq APIs Date: Wed, 23 Sep 2015 16:31:21 -0400 Message-ID: <20150923203121.GA30295__38219.7340987206$1443042363$gmane$org@l.oracle.com> References: <1441456782-31318-1-git-send-email-bob.liu@oracle.com> <1441456782-31318-2-git-send-email-bob.liu@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Content-Disposition: inline In-Reply-To: <1441456782-31318-2-git-send-email-bob.liu@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Bob Liu Cc: hch@infradead.org, felipe.franciosi@citrix.com, rafal.mielniczuk@citrix.com, jonathan.davies@citrix.com, linux-kernel@vger.kernel.org, xen-devel@lists.xen.org, axboe@fb.com, david.vrabel@citrix.com, avanzini.arianna@gmail.com, boris.ostrovsky@oracle.com, roger.pau@citrix.com List-Id: xen-devel@lists.xenproject.org On Sat, Sep 05, 2015 at 08:39:34PM +0800, Bob Liu wrote: > Note: This patch is based on original work of Arianna's internship for > GNOME's Outreach Program for Women. > > Only one hardware queue is used now, so there is no significant > performance change > > The legacy non-mq code is deleted completely which is the same as other > drivers like virtio, mtip, and nvme. > > Also dropped one unnecessary holding of info->io_lock when calling > blk_mq_stop_hw_queues(). > > Signed-off-by: Arianna Avanzini > Signed-off-by: Bob Liu > Reviewed-by: Christoph Hellwig > Acked-by: Jens Axboe > Signed-off-by: David Vrabel Odd. This should have gone in Linux 4.3 but it did not? I remember seeing it there? I think? Anyhow I will put this in my queue for 4.4. > --- > drivers/block/xen-blkfront.c | 146 +++++++++++++++++------------------------- > 1 file changed, 60 insertions(+), 86 deletions(-) > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 7a8a73f..5dd591d 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -37,6 +37,7 @@ > > #include > #include > +#include > #include > #include > #include > @@ -148,6 +149,7 @@ struct blkfront_info > unsigned int feature_persistent:1; > unsigned int max_indirect_segments; > int is_ready; > + struct blk_mq_tag_set tag_set; > }; > > static unsigned int nr_minors; > @@ -617,54 +619,41 @@ static inline bool blkif_request_flush_invalid(struct request *req, > !(info->feature_flush & REQ_FUA))); > } > > -/* > - * do_blkif_request > - * read a block; request is in a request queue > - */ > -static void do_blkif_request(struct request_queue *rq) > +static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, > + const struct blk_mq_queue_data *qd) > { > - struct blkfront_info *info = NULL; > - struct request *req; > - int queued; > - > - pr_debug("Entered do_blkif_request\n"); > - > - queued = 0; > + struct blkfront_info *info = qd->rq->rq_disk->private_data; > > - while ((req = blk_peek_request(rq)) != NULL) { > - info = req->rq_disk->private_data; > - > - if (RING_FULL(&info->ring)) > - goto wait; > + blk_mq_start_request(qd->rq); > + spin_lock_irq(&info->io_lock); > + if (RING_FULL(&info->ring)) > + goto out_busy; > > - blk_start_request(req); > + if (blkif_request_flush_invalid(qd->rq, info)) > + goto out_err; > > - if (blkif_request_flush_invalid(req, info)) { > - __blk_end_request_all(req, -EOPNOTSUPP); > - continue; > - } > + if (blkif_queue_request(qd->rq)) > + goto out_busy; > > - pr_debug("do_blk_req %p: cmd %p, sec %lx, " > - "(%u/%u) [%s]\n", > - req, req->cmd, (unsigned long)blk_rq_pos(req), > - blk_rq_cur_sectors(req), blk_rq_sectors(req), > - rq_data_dir(req) ? "write" : "read"); > - > - if (blkif_queue_request(req)) { > - blk_requeue_request(rq, req); > -wait: > - /* Avoid pointless unplugs. */ > - blk_stop_queue(rq); > - break; > - } > + flush_requests(info); > + spin_unlock_irq(&info->io_lock); > + return BLK_MQ_RQ_QUEUE_OK; > > - queued++; > - } > +out_err: > + spin_unlock_irq(&info->io_lock); > + return BLK_MQ_RQ_QUEUE_ERROR; > > - if (queued != 0) > - flush_requests(info); > +out_busy: > + spin_unlock_irq(&info->io_lock); > + blk_mq_stop_hw_queue(hctx); > + return BLK_MQ_RQ_QUEUE_BUSY; > } > > +static struct blk_mq_ops blkfront_mq_ops = { > + .queue_rq = blkif_queue_rq, > + .map_queue = blk_mq_map_queue, > +}; > + > static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > unsigned int physical_sector_size, > unsigned int segments) > @@ -672,9 +661,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > struct request_queue *rq; > struct blkfront_info *info = gd->private_data; > > - rq = blk_init_queue(do_blkif_request, &info->io_lock); > - if (rq == NULL) > + memset(&info->tag_set, 0, sizeof(info->tag_set)); > + info->tag_set.ops = &blkfront_mq_ops; > + info->tag_set.nr_hw_queues = 1; > + info->tag_set.queue_depth = BLK_RING_SIZE(info); > + info->tag_set.numa_node = NUMA_NO_NODE; > + info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; > + info->tag_set.cmd_size = 0; > + info->tag_set.driver_data = info; > + > + if (blk_mq_alloc_tag_set(&info->tag_set)) > return -1; > + rq = blk_mq_init_queue(&info->tag_set); > + if (IS_ERR(rq)) { > + blk_mq_free_tag_set(&info->tag_set); > + return -1; > + } > > queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); > > @@ -902,19 +904,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > static void xlvbd_release_gendisk(struct blkfront_info *info) > { > unsigned int minor, nr_minors; > - unsigned long flags; > > if (info->rq == NULL) > return; > > - spin_lock_irqsave(&info->io_lock, flags); > - > /* No more blkif_request(). */ > - blk_stop_queue(info->rq); > + blk_mq_stop_hw_queues(info->rq); > > /* No more gnttab callback work. */ > gnttab_cancel_free_callback(&info->callback); > - spin_unlock_irqrestore(&info->io_lock, flags); > > /* Flush gnttab callback work. Must be done with no locks held. */ > flush_work(&info->work); > @@ -926,20 +924,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) > xlbd_release_minors(minor, nr_minors); > > blk_cleanup_queue(info->rq); > + blk_mq_free_tag_set(&info->tag_set); > info->rq = NULL; > > put_disk(info->gd); > info->gd = NULL; > } > > +/* Must be called with io_lock holded */ > static void kick_pending_request_queues(struct blkfront_info *info) > { > - if (!RING_FULL(&info->ring)) { > - /* Re-enable calldowns. */ > - blk_start_queue(info->rq); > - /* Kick things off immediately. */ > - do_blkif_request(info->rq); > - } > + if (!RING_FULL(&info->ring)) > + blk_mq_start_stopped_hw_queues(info->rq, true); > } > > static void blkif_restart_queue(struct work_struct *work) > @@ -964,7 +960,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) > BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; > /* No more blkif_request(). */ > if (info->rq) > - blk_stop_queue(info->rq); > + blk_mq_stop_hw_queues(info->rq); > > /* Remove all persistent grants */ > if (!list_empty(&info->grants)) { > @@ -1147,7 +1143,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > RING_IDX i, rp; > unsigned long flags; > struct blkfront_info *info = (struct blkfront_info *)dev_id; > - int error; > > spin_lock_irqsave(&info->io_lock, flags); > > @@ -1188,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > continue; > } > > - error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; > + req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; > switch (bret->operation) { > case BLKIF_OP_DISCARD: > if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { > struct request_queue *rq = info->rq; > printk(KERN_WARNING "blkfront: %s: %s op failed\n", > info->gd->disk_name, op_name(bret->operation)); > - error = -EOPNOTSUPP; > + req->errors = -EOPNOTSUPP; > info->feature_discard = 0; > info->feature_secdiscard = 0; > queue_flag_clear(QUEUE_FLAG_DISCARD, rq); > queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); > } > - __blk_end_request_all(req, error); > + blk_mq_complete_request(req); > break; > case BLKIF_OP_FLUSH_DISKCACHE: > case BLKIF_OP_WRITE_BARRIER: > if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { > printk(KERN_WARNING "blkfront: %s: %s op failed\n", > info->gd->disk_name, op_name(bret->operation)); > - error = -EOPNOTSUPP; > + req->errors = -EOPNOTSUPP; > } > if (unlikely(bret->status == BLKIF_RSP_ERROR && > info->shadow[id].req.u.rw.nr_segments == 0)) { > printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", > info->gd->disk_name, op_name(bret->operation)); > - error = -EOPNOTSUPP; > + req->errors = -EOPNOTSUPP; > } > - if (unlikely(error)) { > - if (error == -EOPNOTSUPP) > - error = 0; > + if (unlikely(req->errors)) { > + if (req->errors == -EOPNOTSUPP) > + req->errors = 0; > info->feature_flush = 0; > xlvbd_flush(info); > } > @@ -1229,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " > "request: %x\n", bret->status); > > - __blk_end_request_all(req, error); > + blk_mq_complete_request(req); > break; > default: > BUG(); > @@ -1558,28 +1553,6 @@ static int blkif_recover(struct blkfront_info *info) > > kfree(copy); > > - /* > - * Empty the queue, this is important because we might have > - * requests in the queue with more segments than what we > - * can handle now. > - */ > - spin_lock_irq(&info->io_lock); > - while ((req = blk_fetch_request(info->rq)) != NULL) { > - if (req->cmd_flags & > - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { > - list_add(&req->queuelist, &requests); > - continue; > - } > - merge_bio.head = req->bio; > - merge_bio.tail = req->biotail; > - bio_list_merge(&bio_list, &merge_bio); > - req->bio = NULL; > - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) > - pr_alert("diskcache flush request found!\n"); > - __blk_end_request_all(req, 0); > - } > - spin_unlock_irq(&info->io_lock); > - > xenbus_switch_state(info->xbdev, XenbusStateConnected); > > spin_lock_irq(&info->io_lock); > @@ -1594,9 +1567,10 @@ static int blkif_recover(struct blkfront_info *info) > /* Requeue pending requests (flush or discard) */ > list_del_init(&req->queuelist); > BUG_ON(req->nr_phys_segments > segs); > - blk_requeue_request(info->rq, req); > + blk_mq_requeue_request(req); > } > spin_unlock_irq(&info->io_lock); > + blk_mq_kick_requeue_list(info->rq); > > while ((bio = bio_list_pop(&bio_list)) != NULL) { > /* Traverse the list of pending bios and re-queue them */ > -- > 1.7.10.4 >