From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753659AbbJGKlR (ORCPT ); Wed, 7 Oct 2015 06:41:17 -0400 Received: from aserp1040.oracle.com ([141.146.126.69]:21172 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750952AbbJGKlP (ORCPT ); Wed, 7 Oct 2015 06:41:15 -0400 Message-ID: <5614F67C.3060003@oracle.com> Date: Wed, 07 Oct 2015 18:39:56 +0800 From: Bob Liu User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130308 Thunderbird/17.0.4 MIME-Version: 1.0 To: =?windows-1252?Q?Roger_Pau_Monn=E9?= CC: xen-devel@lists.xen.org, david.vrabel@citrix.com, linux-kernel@vger.kernel.org, konrad.wilk@oracle.com, felipe.franciosi@citrix.com, axboe@fb.com, hch@infradead.org, avanzini.arianna@gmail.com, rafal.mielniczuk@citrix.com, boris.ostrovsky@oracle.com, jonathan.davies@citrix.com Subject: Re: [PATCH v3 6/9] xen/blkfront: negotiate the number of hw queues/rings with backend References: <1441456782-31318-1-git-send-email-bob.liu@oracle.com> <1441456782-31318-7-git-send-email-bob.liu@oracle.com> <56128BC0.1090109@citrix.com> In-Reply-To: <56128BC0.1090109@citrix.com> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 8bit X-Source-IP: userv0022.oracle.com [156.151.31.74] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 10/05/2015 10:40 PM, Roger Pau Monné wrote: > El 05/09/15 a les 14.39, Bob Liu ha escrit: >> The max number of hardware queues for xen/blkfront is set by parameter >> 'max_queues', while the number xen/blkback supported is notified through >> xenstore("multi-queue-max-queues"). >> >> The negotiated number was the smaller one, and was written back to >> xen/blkback as "multi-queue-num-queues". > > I would write this in present instead of past, I think it's clearer: > > The negotiated number _is_ the smaller... Will update. > >> >> Signed-off-by: Bob Liu >> --- >> drivers/block/xen-blkfront.c | 142 ++++++++++++++++++++++++++++++++---------- >> 1 file changed, 108 insertions(+), 34 deletions(-) >> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c >> index 1cae76b..1aa66c9 100644 >> --- a/drivers/block/xen-blkfront.c >> +++ b/drivers/block/xen-blkfront.c >> @@ -107,6 +107,10 @@ static unsigned int xen_blkif_max_ring_order; >> module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); >> MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); >> >> +static unsigned int xen_blkif_max_queues; >> +module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO); >> +MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings per virtual disk"); >> + >> #define BLK_RING_SIZE(dinfo) __CONST_RING_SIZE(blkif, PAGE_SIZE * (dinfo)->pages_per_ring) >> #define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES) >> /* >> @@ -114,6 +118,7 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the >> * characters are enough. Define to 20 to keep consist with backend. >> */ >> #define RINGREF_NAME_LEN (20) >> +#define QUEUE_NAME_LEN (12) >> >> /* >> * Per-ring info. >> @@ -687,7 +692,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, >> >> memset(&dinfo->tag_set, 0, sizeof(dinfo->tag_set)); >> dinfo->tag_set.ops = &blkfront_mq_ops; >> - dinfo->tag_set.nr_hw_queues = 1; >> + dinfo->tag_set.nr_hw_queues = dinfo->nr_rings; >> dinfo->tag_set.queue_depth = BLK_RING_SIZE(dinfo); >> dinfo->tag_set.numa_node = NUMA_NO_NODE; >> dinfo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; >> @@ -1350,6 +1355,51 @@ fail: >> return err; >> } >> >> +static int write_per_ring_nodes(struct xenbus_transaction xbt, >> + struct blkfront_ring_info *rinfo, const char *dir) >> +{ >> + int err, i; >> + const char *message = NULL; >> + struct blkfront_dev_info *dinfo = rinfo->dinfo; >> + >> + if (dinfo->pages_per_ring == 1) { >> + err = xenbus_printf(xbt, dir, "ring-ref", "%u", rinfo->ring_ref[0]); >> + if (err) { >> + message = "writing ring-ref"; >> + goto abort_transaction; >> + } >> + pr_info("%s: write ring-ref:%d\n", dir, rinfo->ring_ref[0]); >> + } else { >> + for (i = 0; i < dinfo->pages_per_ring; i++) { >> + char ring_ref_name[RINGREF_NAME_LEN]; >> + >> + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); >> + err = xenbus_printf(xbt, dir, ring_ref_name, >> + "%u", rinfo->ring_ref[i]); >> + if (err) { >> + message = "writing ring-ref"; >> + goto abort_transaction; >> + } >> + pr_info("%s: write ring-ref:%d\n", dir, rinfo->ring_ref[i]); >> + } >> + } >> + >> + err = xenbus_printf(xbt, dir, "event-channel", "%u", rinfo->evtchn); >> + if (err) { >> + message = "writing event-channel"; >> + goto abort_transaction; >> + } >> + pr_info("%s: write event-channel:%d\n", dir, rinfo->evtchn); >> + >> + return 0; >> + >> +abort_transaction: >> + xenbus_transaction_end(xbt, 1); > > The transaction is not started inside of the helper, so I would prefer > it to be ended in the same function where it is started. This should > just return the error and the caller should end the transaction. > The difficult part is how to return the right message? >> + if (message) >> + xenbus_dev_fatal(dinfo->xbdev, err, "%s", message); >> + >> + return err; >> +} >> >> /* Common code used when first setting up, and when resuming. */ >> static int talk_to_blkback(struct xenbus_device *dev, >> @@ -1386,45 +1436,51 @@ again: >> goto out; >> } >> >> + if (dinfo->pages_per_ring > 1) { >> + err = xenbus_printf(xbt, dev->nodename, "ring-page-order", "%u", >> + ring_page_order); >> + if (err) { >> + message = "writing ring-page-order"; >> + goto abort_transaction; >> + } >> + } >> + >> + /* We already got the number of queues in _probe */ >> if (dinfo->nr_rings == 1) { >> rinfo = &dinfo->rinfo[0]; >> >> - if (dinfo->pages_per_ring == 1) { >> - err = xenbus_printf(xbt, dev->nodename, >> - "ring-ref", "%u", rinfo->ring_ref[0]); >> - if (err) { >> - message = "writing ring-ref"; >> - goto abort_transaction; >> - } >> - } else { >> - err = xenbus_printf(xbt, dev->nodename, >> - "ring-page-order", "%u", ring_page_order); >> - if (err) { >> - message = "writing ring-page-order"; >> - goto abort_transaction; >> - } >> - >> - for (i = 0; i < dinfo->pages_per_ring; i++) { >> - char ring_ref_name[RINGREF_NAME_LEN]; >> + err = write_per_ring_nodes(xbt, &dinfo->rinfo[0], dev->nodename); >> + if (err) >> + goto out; >> + } else { >> + char *path; >> + size_t pathsize; >> >> - snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); >> - err = xenbus_printf(xbt, dev->nodename, ring_ref_name, >> - "%u", rinfo->ring_ref[i]); >> - if (err) { >> - message = "writing ring-ref"; >> - goto abort_transaction; >> - } >> - } >> - } >> - err = xenbus_printf(xbt, dev->nodename, >> - "event-channel", "%u", rinfo->evtchn); >> + err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues", "%u", >> + dinfo->nr_rings); >> if (err) { >> - message = "writing event-channel"; >> + message = "writing multi-queue-num-queues"; >> goto abort_transaction; >> } >> - } else { >> - /* Not supported at this stage */ >> - goto abort_transaction; >> + >> + pathsize = strlen(dev->nodename) + QUEUE_NAME_LEN; >> + path = kzalloc(pathsize, GFP_KERNEL); > > kmalloc seems better here, since you are memseting it below in order to > reuse it. > >> + if (!path) { >> + err = -ENOMEM; >> + message = "ENOMEM while writing ring references"; >> + goto abort_transaction; >> + } >> + >> + for (i = 0; i < dinfo->nr_rings; i++) { >> + memset(path, 0, pathsize); >> + snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i); >> + err = write_per_ring_nodes(xbt, &dinfo->rinfo[i], path); >> + if (err) { >> + kfree(path); >> + goto out; >> + } >> + } >> + kfree(path); >> } >> >> err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", >> @@ -1480,6 +1536,7 @@ static int blkfront_probe(struct xenbus_device *dev, >> int err, vdevice, r_index; >> struct blkfront_dev_info *dinfo; >> struct blkfront_ring_info *rinfo; >> + unsigned int back_max_queues = 0; >> >> /* FIXME: Use dynamic device id if this is not set. */ >> err = xenbus_scanf(XBT_NIL, dev->nodename, >> @@ -1534,7 +1591,17 @@ static int blkfront_probe(struct xenbus_device *dev, >> dinfo->vdevice = vdevice; >> dinfo->connected = BLKIF_STATE_DISCONNECTED; >> >> - dinfo->nr_rings = 1; >> + /* Check if backend supports multiple queues */ >> + err = xenbus_scanf(XBT_NIL, dinfo->xbdev->otherend, >> + "multi-queue-max-queues", "%u", &back_max_queues); >> + if (err < 0) >> + back_max_queues = 1; >> + >> + dinfo->nr_rings = min(back_max_queues, xen_blkif_max_queues); >> + if (dinfo->nr_rings <= 0) >> + dinfo->nr_rings = 1; >> + pr_info("dinfo->nr_rings:%u, backend support max-queues:%u\n", dinfo->nr_rings, back_max_queues); > ^ pr_debug ^ supports > > And I would also print the number of queues that are going to be used. > Will update all above comments. >> + >> dinfo->rinfo = kzalloc(sizeof(*rinfo) * dinfo->nr_rings, GFP_KERNEL); >> if (!dinfo->rinfo) { >> xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure"); >> @@ -2257,6 +2324,7 @@ static struct xenbus_driver blkfront_driver = { >> static int __init xlblk_init(void) >> { >> int ret; >> + int nr_cpus = num_online_cpus(); >> >> if (!xen_domain()) >> return -ENODEV; >> @@ -2267,6 +2335,12 @@ static int __init xlblk_init(void) >> xen_blkif_max_ring_order = 0; >> } >> >> + if (xen_blkif_max_queues > nr_cpus) { > > Shouldn't there be a default value for xen_blkif_max_queues if the user > hasn't set the parameter on the command line? > Then the default value is 0, multi-queue isn't enabled by default. >> + pr_info("Invalid max_queues (%d), will use default max: %d.\n", >> + xen_blkif_max_queues, nr_cpus); >> + xen_blkif_max_queues = nr_cpus; >> + } >> + >> if (!xen_has_pv_disk_devices()) >> return -ENODEV; >> >> > -- Regards, -Bob