From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755182AbbBOIUT (ORCPT ); Sun, 15 Feb 2015 03:20:19 -0500 Received: from aserp1040.oracle.com ([141.146.126.69]:33905 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754970AbbBOIUQ (ORCPT ); Sun, 15 Feb 2015 03:20:16 -0500 From: Bob Liu To: xen-devel@lists.xen.org Cc: david.vrabel@citrix.com, linux-kernel@vger.kernel.org, roger.pau@citrix.com, konrad.wilk@oracle.com, felipe.franciosi@citrix.com, axboe@fb.com, hch@infradead.org, avanzini.arianna@gmail.com, Bob Liu Subject: [PATCH 06/10] xen/blkfront: pseudo support for multi hardware queues Date: Sun, 15 Feb 2015 16:19:01 +0800 Message-Id: <1423988345-4005-7-git-send-email-bob.liu@oracle.com> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1423988345-4005-1-git-send-email-bob.liu@oracle.com> References: <1423988345-4005-1-git-send-email-bob.liu@oracle.com> X-Source-IP: aserv0022.oracle.com [141.146.126.234] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Prepare patch for multi hardware queues, the ring number was mandatory set to 1. Signed-off-by: Arianna Avanzini Signed-off-by: Bob Liu --- drivers/block/xen-blkfront.c | 408 +++++++++++++++++++++++++------------------ 1 file changed, 234 insertions(+), 174 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index aaa4a0e..d551be0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -143,7 +143,8 @@ struct blkfront_info { unsigned int max_indirect_segments; int is_ready; struct blk_mq_tag_set tag_set; - struct blkfront_ring_info rinfo; + struct blkfront_ring_info *rinfo; + unsigned int nr_rings; }; static unsigned int nr_minors; @@ -176,7 +177,8 @@ static DEFINE_SPINLOCK(minor_lock); #define INDIRECT_GREFS(_segs) \ ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) -static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo); +static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo, unsigned int segs); +static int blkfront_gather_indirect(struct blkfront_info *info); static int get_id_from_freelist(struct blkfront_ring_info *rinfo) { @@ -656,7 +658,7 @@ static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, { struct blkfront_info *info = (struct blkfront_info *)data; - hctx->driver_data = &info->rinfo; + hctx->driver_data = &info->rinfo[index]; return 0; } @@ -915,8 +917,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, static void xlvbd_release_gendisk(struct blkfront_info *info) { - unsigned int minor, nr_minors; - struct blkfront_ring_info *rinfo = &info->rinfo; + unsigned int minor, nr_minors, i; + struct blkfront_ring_info *rinfo; if (info->rq == NULL) return; @@ -924,11 +926,14 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) /* No more blkif_request(). */ blk_mq_stop_hw_queues(info->rq); - /* No more gnttab callback work. */ - gnttab_cancel_free_callback(&rinfo->callback); + for (i = 0; i < info->nr_rings; i++) { + rinfo = &info->rinfo[i]; + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&rinfo->callback); - /* Flush gnttab callback work. Must be done with no locks held. */ - flush_work(&rinfo->work); + /* Flush gnttab callback work. Must be done with no locks held. */ + flush_work(&rinfo->work); + } del_gendisk(info->gd); @@ -969,8 +974,8 @@ static void blkif_free(struct blkfront_info *info, int suspend) { struct grant *persistent_gnt; struct grant *n; - int i, j, segs; - struct blkfront_ring_info *rinfo = &info->rinfo; + int i, j, segs, rindex; + struct blkfront_ring_info *rinfo; /* Prevent new requests being issued until we fix things up. */ info->connected = suspend ? @@ -979,97 +984,100 @@ static void blkif_free(struct blkfront_info *info, int suspend) if (info->rq) blk_mq_stop_hw_queues(info->rq); - spin_lock_irq(&rinfo->io_lock); - /* Remove all persistent grants */ - if (!list_empty(&rinfo->grants)) { - list_for_each_entry_safe(persistent_gnt, n, - &rinfo->grants, node) { - list_del(&persistent_gnt->node); - if (persistent_gnt->gref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(persistent_gnt->gref, - 0, 0UL); - rinfo->persistent_gnts_c--; + for (rindex = 0; rindex < info->nr_rings; rindex++) { + rinfo = &info->rinfo[rindex]; + spin_lock_irq(&rinfo->io_lock); + /* Remove all persistent grants */ + if (!list_empty(&rinfo->grants)) { + list_for_each_entry_safe(persistent_gnt, n, + &rinfo->grants, node) { + list_del(&persistent_gnt->node); + if (persistent_gnt->gref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(persistent_gnt->gref, + 0, 0UL); + rinfo->persistent_gnts_c--; + } + if (info->feature_persistent) + __free_page(pfn_to_page(persistent_gnt->pfn)); + kfree(persistent_gnt); } - if (info->feature_persistent) - __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); } - } - BUG_ON(rinfo->persistent_gnts_c != 0); + BUG_ON(rinfo->persistent_gnts_c != 0); - /* - * Remove indirect pages, this only happens when using indirect - * descriptors but not persistent grants - */ - if (!list_empty(&rinfo->indirect_pages)) { - struct page *indirect_page, *n; - - BUG_ON(info->feature_persistent); - list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { - list_del(&indirect_page->lru); - __free_page(indirect_page); - } - } - - for (i = 0; i < BLK_RING_SIZE; i++) { /* - * Clear persistent grants present in requests already - * on the shared ring + * Remove indirect pages, this only happens when using indirect + * descriptors but not persistent grants */ - if (!rinfo->shadow[i].request) - goto free_shadow; - - segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ? - rinfo->shadow[i].req.u.indirect.nr_segments : - rinfo->shadow[i].req.u.rw.nr_segments; - for (j = 0; j < segs; j++) { - persistent_gnt = rinfo->shadow[i].grants_used[j]; - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - if (info->feature_persistent) - __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); + if (!list_empty(&rinfo->indirect_pages)) { + struct page *indirect_page, *n; + + BUG_ON(info->feature_persistent); + list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { + list_del(&indirect_page->lru); + __free_page(indirect_page); + } } - if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT) + for (i = 0; i < BLK_RING_SIZE; i++) { /* - * If this is not an indirect operation don't try to - * free indirect segments + * Clear persistent grants present in requests already + * on the shared ring */ - goto free_shadow; + if (!rinfo->shadow[i].request) + goto free_shadow; + + segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ? + rinfo->shadow[i].req.u.indirect.nr_segments : + rinfo->shadow[i].req.u.rw.nr_segments; + for (j = 0; j < segs; j++) { + persistent_gnt = rinfo->shadow[i].grants_used[j]; + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + if (info->feature_persistent) + __free_page(pfn_to_page(persistent_gnt->pfn)); + kfree(persistent_gnt); + } - for (j = 0; j < INDIRECT_GREFS(segs); j++) { - persistent_gnt = rinfo->shadow[i].indirect_grants[j]; - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); - } + if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT) + /* + * If this is not an indirect operation don't try to + * free indirect segments + */ + goto free_shadow; + + for (j = 0; j < INDIRECT_GREFS(segs); j++) { + persistent_gnt = rinfo->shadow[i].indirect_grants[j]; + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + __free_page(pfn_to_page(persistent_gnt->pfn)); + kfree(persistent_gnt); + } free_shadow: - kfree(rinfo->shadow[i].grants_used); - rinfo->shadow[i].grants_used = NULL; - kfree(rinfo->shadow[i].indirect_grants); - rinfo->shadow[i].indirect_grants = NULL; - kfree(rinfo->shadow[i].sg); - rinfo->shadow[i].sg = NULL; - } + kfree(rinfo->shadow[i].grants_used); + rinfo->shadow[i].grants_used = NULL; + kfree(rinfo->shadow[i].indirect_grants); + rinfo->shadow[i].indirect_grants = NULL; + kfree(rinfo->shadow[i].sg); + rinfo->shadow[i].sg = NULL; + } - /* No more gnttab callback work. */ - gnttab_cancel_free_callback(&rinfo->callback); - spin_unlock_irq(&rinfo->io_lock); + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&rinfo->callback); + spin_unlock_irq(&rinfo->io_lock); - /* Flush gnttab callback work. Must be done with no locks held. */ - flush_work(&rinfo->work); + /* Flush gnttab callback work. Must be done with no locks held. */ + flush_work(&rinfo->work); - /* Free resources associated with old device channel. */ - if (rinfo->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(rinfo->ring_ref, 0, - (unsigned long)rinfo->ring.sring); - rinfo->ring_ref = GRANT_INVALID_REF; - rinfo->ring.sring = NULL; + /* Free resources associated with old device channel. */ + if (rinfo->ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(rinfo->ring_ref, 0, + (unsigned long)rinfo->ring.sring); + rinfo->ring_ref = GRANT_INVALID_REF; + rinfo->ring.sring = NULL; + } + if (rinfo->irq) + unbind_from_irqhandler(rinfo->irq, rinfo); + rinfo->evtchn = rinfo->irq = 0; } - if (rinfo->irq) - unbind_from_irqhandler(rinfo->irq, rinfo); - rinfo->evtchn = rinfo->irq = 0; } @@ -1265,6 +1273,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static void destroy_blkring(struct xenbus_device *dev, + struct blkfront_ring_info *rinfo) +{ + if (rinfo->irq) + unbind_from_irqhandler(rinfo->irq, rinfo); + if (rinfo->evtchn) + xenbus_free_evtchn(dev, rinfo->evtchn); + if (rinfo->ring_ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(rinfo->ring_ref, 0, (unsigned long)rinfo->ring.sring); + if (rinfo->ring.sring) + free_page((unsigned long)rinfo->ring.sring); +} static int setup_blkring(struct xenbus_device *dev, struct blkfront_ring_info *rinfo) @@ -1305,7 +1325,7 @@ static int setup_blkring(struct xenbus_device *dev, return 0; fail: - blkif_free(rinfo->info, 0); + destroy_blkring(dev, rinfo); return err; } @@ -1316,31 +1336,40 @@ static int talk_to_blkback(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; - int err; - struct blkfront_ring_info *rinfo = &info->rinfo; + int err, i; + struct blkfront_ring_info *rinfo; - /* Create shared ring, alloc event channel. */ - err = setup_blkring(dev, rinfo); - if (err) - goto out; + for (i = 0; i < info->nr_rings; i++) { + rinfo = &info->rinfo[i]; + /* Create shared ring, alloc event channel. */ + err = setup_blkring(dev, rinfo); + if (err) + goto out; + } again: err = xenbus_transaction_start(&xbt); if (err) { xenbus_dev_fatal(dev, err, "starting transaction"); - goto destroy_blkring; + goto out; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", rinfo->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; - } - err = xenbus_printf(xbt, dev->nodename, - "event-channel", "%u", rinfo->evtchn); - if (err) { - message = "writing event-channel"; + if (info->nr_rings == 1) { + rinfo = &info->rinfo[0]; + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%u", rinfo->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, + "event-channel", "%u", rinfo->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + } else { + /* Not supported at this stage */ goto abort_transaction; } err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", @@ -1360,7 +1389,7 @@ again: if (err == -EAGAIN) goto again; xenbus_dev_fatal(dev, err, "completing transaction"); - goto destroy_blkring; + goto out; } xenbus_switch_state(dev, XenbusStateInitialised); @@ -1371,9 +1400,11 @@ again: xenbus_transaction_end(xbt, 1); if (message) xenbus_dev_fatal(dev, err, "%s", message); - destroy_blkring: - blkif_free(info, 0); out: + while (--i >= 0) { + rinfo = &info->rinfo[i]; + destroy_blkring(dev, rinfo); + } return err; } @@ -1386,7 +1417,7 @@ again: static int blkfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int err, vdevice, i; + int err, vdevice, i, rindex; struct blkfront_info *info; struct blkfront_ring_info *rinfo; @@ -1437,22 +1468,32 @@ static int blkfront_probe(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); return -ENOMEM; } - - rinfo = &info->rinfo; mutex_init(&info->mutex); - spin_lock_init(&rinfo->io_lock); info->xbdev = dev; info->vdevice = vdevice; - INIT_LIST_HEAD(&rinfo->grants); - INIT_LIST_HEAD(&rinfo->indirect_pages); - rinfo->persistent_gnts_c = 0; info->connected = BLKIF_STATE_DISCONNECTED; - rinfo->info = info; - INIT_WORK(&rinfo->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) - rinfo->shadow[i].req.u.rw.id = i+1; - rinfo->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->nr_rings = 1; + info->rinfo = kzalloc(sizeof(*rinfo) * info->nr_rings, GFP_KERNEL); + if (!info->rinfo) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure"); + kfree(info); + return -ENOMEM; + } + + for (rindex = 0; rindex < info->nr_rings; rindex++) { + rinfo = &info->rinfo[rindex]; + spin_lock_init(&rinfo->io_lock); + INIT_LIST_HEAD(&rinfo->grants); + INIT_LIST_HEAD(&rinfo->indirect_pages); + rinfo->persistent_gnts_c = 0; + rinfo->info = info; + INIT_WORK(&rinfo->work, blkif_restart_queue); + + for (i = 0; i < BLK_RING_SIZE; i++) + rinfo->shadow[i].req.u.rw.id = i+1; + rinfo->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + } /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); @@ -1485,7 +1526,7 @@ static void split_bio_end(struct bio *bio, int error) static int blkif_recover(struct blkfront_info *info) { - int i; + int i, rindex; struct request *req, *n; struct blk_shadow *copy; int rc; @@ -1495,64 +1536,70 @@ static int blkif_recover(struct blkfront_info *info) int pending, size; struct split_bio *split_bio; struct list_head requests; - struct blkfront_ring_info *rinfo = &info->rinfo; - - /* Stage 1: Make a safe copy of the shadow state. */ - copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow), - GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); - if (!copy) - return -ENOMEM; - - /* Stage 2: Set up free list. */ - memset(&rinfo->shadow, 0, sizeof(rinfo->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) - rinfo->shadow[i].req.u.rw.id = i+1; - rinfo->shadow_free = rinfo->ring.req_prod_pvt; - rinfo->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; - - rc = blkfront_setup_indirect(rinfo); - if (rc) { - kfree(copy); - return rc; - } + struct blkfront_ring_info *rinfo; - segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; + segs = blkfront_gather_indirect(info); blk_queue_max_segments(info->rq, segs); bio_list_init(&bio_list); INIT_LIST_HEAD(&requests); - for (i = 0; i < BLK_RING_SIZE; i++) { - /* Not in use? */ - if (!copy[i].request) - continue; + for (rindex = 0; rindex < info->nr_rings; rindex++) { + rinfo = &info->rinfo[rindex]; + /* Stage 1: Make a safe copy of the shadow state. */ + copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow), + GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); + if (!copy) + return -ENOMEM; + + /* Stage 2: Set up free list. */ + memset(&rinfo->shadow, 0, sizeof(rinfo->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + rinfo->shadow[i].req.u.rw.id = i+1; + rinfo->shadow_free = rinfo->ring.req_prod_pvt; + rinfo->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + + rc = blkfront_setup_indirect(rinfo, segs); + if (rc) { + kfree(copy); + return rc; + } + + for (i = 0; i < BLK_RING_SIZE; i++) { + /* Not in use? */ + if (!copy[i].request) + continue; - /* - * Get the bios in the request so we can re-queue them. - */ - if (copy[i].request->cmd_flags & - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { /* - * Flush operations don't contain bios, so - * we need to requeue the whole request + * Get the bios in the request so we can re-queue them. */ - list_add(©[i].request->queuelist, &requests); - continue; + if (copy[i].request->cmd_flags & + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { + /* + * Flush operations don't contain bios, so + * we need to requeue the whole request + */ + list_add(©[i].request->queuelist, &requests); + continue; + } + merge_bio.head = copy[i].request->bio; + merge_bio.tail = copy[i].request->biotail; + bio_list_merge(&bio_list, &merge_bio); + copy[i].request->bio = NULL; + blk_put_request(copy[i].request); } - merge_bio.head = copy[i].request->bio; - merge_bio.tail = copy[i].request->biotail; - bio_list_merge(&bio_list, &merge_bio); - copy[i].request->bio = NULL; - blk_put_request(copy[i].request); - } - kfree(copy); + kfree(copy); + } xenbus_switch_state(info->xbdev, XenbusStateConnected); /* Now safe for us to use the shared ring */ info->connected = BLKIF_STATE_CONNECTED; - /* Kick any other new requests queued since we resumed */ - kick_pending_request_queues(rinfo); + for (rindex = 0; rindex < info->nr_rings; rindex++) { + rinfo = &info->rinfo[rindex]; + /* Kick any other new requests queued since we resumed */ + kick_pending_request_queues(rinfo); + } list_for_each_entry_safe(req, n, &requests, queuelist) { /* Requeue pending requests (flush or discard) */ @@ -1685,11 +1732,10 @@ static void blkfront_setup_discard(struct blkfront_info *info) info->feature_secdiscard = !!discard_secure; } -static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) +static int blkfront_gather_indirect(struct blkfront_info *info) { unsigned int indirect_segments, segs; - int err, i; - struct blkfront_info *info = rinfo->info; + int err; err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "feature-max-indirect-segments", "%u", &indirect_segments, @@ -1702,6 +1748,13 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) xen_blkif_max_segments); segs = info->max_indirect_segments; } + return segs; +} + +static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo, unsigned int segs) +{ + int err, i; + struct blkfront_info *info = rinfo->info; err = fill_grant_buffer(rinfo, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); if (err) @@ -1774,9 +1827,9 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int physical_sector_size; unsigned int binfo; - int err; + int err, i; int barrier, flush, discard, persistent; - struct blkfront_ring_info *rinfo = &info->rinfo; + struct blkfront_ring_info *rinfo; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -1874,11 +1927,15 @@ static void blkfront_connect(struct blkfront_info *info) else info->feature_persistent = persistent; - err = blkfront_setup_indirect(rinfo); - if (err) { - xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", - info->xbdev->otherend); - return; + for (i = 0; i < info->nr_rings; i++) { + rinfo = &info->rinfo[i]; + err = blkfront_setup_indirect(rinfo, blkfront_gather_indirect(info)); + if (err) { + xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", + info->xbdev->otherend); + blkif_free(info, 0); + return; + } } err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, @@ -1893,7 +1950,10 @@ static void blkfront_connect(struct blkfront_info *info) /* Kick pending requests. */ info->connected = BLKIF_STATE_CONNECTED; - kick_pending_request_queues(rinfo); + for (i = 0; i < info->nr_rings; i++) { + rinfo = &info->rinfo[i]; + kick_pending_request_queues(rinfo); + } add_disk(info->gd); -- 1.8.3.1