From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S965757AbbKEDCo (ORCPT <rfc822;w@1wt.eu>);
	Wed, 4 Nov 2015 22:02:44 -0500
Received: from aserp1040.oracle.com ([141.146.126.69]:31755 "EHLO
	aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S964910AbbKEDCm (ORCPT
	<rfc822;linux-kernel@vger.kernel.org>);
	Wed, 4 Nov 2015 22:02:42 -0500
Message-ID: <563AC6C2.9080909@oracle.com>
Date: Thu, 05 Nov 2015 11:02:26 +0800
From: Bob Liu <bob.liu@oracle.com>
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130308 Thunderbird/17.0.4
MIME-Version: 1.0
To: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: xen-devel@lists.xen.org, linux-kernel@vger.kernel.org,
        roger.pau@citrix.com, felipe.franciosi@citrix.com, axboe@fb.com,
        avanzini.arianna@gmail.com, rafal.mielniczuk@citrix.com,
        jonathan.davies@citrix.com, david.vrabel@citrix.com
Subject: Re: [PATCH v4 07/10] xen/blkback: pseudo support for multi hardware
 queues/rings
References: <1446438106-20171-1-git-send-email-bob.liu@oracle.com> <1446438106-20171-8-git-send-email-bob.liu@oracle.com> <20151105023004.GA3949@x230.dumpdata.com>
In-Reply-To: <20151105023004.GA3949@x230.dumpdata.com>
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit
X-Source-IP: userv0022.oracle.com [156.151.31.74]
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org


On 11/05/2015 10:30 AM, Konrad Rzeszutek Wilk wrote:
> On Mon, Nov 02, 2015 at 12:21:43PM +0800, Bob Liu wrote:
>> Preparatory patch for multiple hardware queues (rings). The number of
>> rings is unconditionally set to 1, larger number will be enabled in next
>> patch so as to make every single patch small and readable.
> 
> Instead of saying 'next patch' - spell out the title of the patch.
> 
> 
>>
>> Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> ---
>>  drivers/block/xen-blkback/common.h |   3 +-
>>  drivers/block/xen-blkback/xenbus.c | 292 +++++++++++++++++++++++--------------
>>  2 files changed, 185 insertions(+), 110 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
>> index f0dd69a..4de1326 100644
>> --- a/drivers/block/xen-blkback/common.h
>> +++ b/drivers/block/xen-blkback/common.h
>> @@ -341,7 +341,8 @@ struct xen_blkif {
>>  	struct work_struct	free_work;
>>  	unsigned int nr_ring_pages;
>>  	/* All rings for this device */
>> -	struct xen_blkif_ring ring;
>> +	struct xen_blkif_ring *rings;
>> +	unsigned int nr_rings;
>>  };
>>  
>>  struct seg_buf {
>> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
>> index 7bdd5fd..ac4b458 100644
>> --- a/drivers/block/xen-blkback/xenbus.c
>> +++ b/drivers/block/xen-blkback/xenbus.c
>> @@ -84,11 +84,12 @@ static int blkback_name(struct xen_blkif *blkif, char *buf)
>>  
>>  static void xen_update_blkif_status(struct xen_blkif *blkif)
>>  {
>> -	int err;
>> +	int err, i;
> 
> unsigned int.
>>  	char name[BLKBACK_NAME_LEN];
>> +	struct xen_blkif_ring *ring;
>>  
>>  	/* Not ready to connect? */
>> -	if (!blkif->ring.irq || !blkif->vbd.bdev)
>> +	if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
>>  		return;
>>  
>>  	/* Already connected? */
>> @@ -113,19 +114,57 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
>>  	}
>>  	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
>>  
>> -	blkif->ring.xenblkd = kthread_run(xen_blkif_schedule, &blkif->ring, "%s", name);
>> -	if (IS_ERR(blkif->ring.xenblkd)) {
>> -		err = PTR_ERR(blkif->ring.xenblkd);
>> -		blkif->ring.xenblkd = NULL;
>> -		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
>> -		return;
>> +	if (blkif->nr_rings == 1) {
>> +		blkif->rings[0].xenblkd = kthread_run(xen_blkif_schedule, &blkif->rings[0], "%s", name);
>> +		if (IS_ERR(blkif->rings[0].xenblkd)) {
>> +			err = PTR_ERR(blkif->rings[0].xenblkd);
>> +			blkif->rings[0].xenblkd = NULL;
>> +			xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
>> +			return;
>> +		}
>> +	} else {
>> +		for (i = 0; i < blkif->nr_rings; i++) {
>> +			ring = &blkif->rings[i];
>> +			ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
>> +			if (IS_ERR(ring->xenblkd)) {
>> +				err = PTR_ERR(ring->xenblkd);
>> +				ring->xenblkd = NULL;
>> +				xenbus_dev_error(blkif->be->dev, err,
>> +						"start %s-%d xenblkd", name, i);
>> +				return;
>> +			}
>> +		}
> 
> Please collapse this together and just have one loop.
> 
> And just use the same name throughout ('%s-%d')
> 
> Also this does not take care of actually freeing the allocated
> ring->xenblkd if one of them fails to start.
> 
> Hmm, looking at this function .. we seem to forget to change the
> state if something fails.
> 
> As in, connect switches the state to Connected.. And if anything blows
> up after the connect() we don't switch the state. We do report an error
> in the XenBus, but the state is the same.
> 
> We should be using xenbus_dev_fatal I believe - so at least the state
> changes to Closing (and then the frontend can switch itself to
> Closing->Closed) - and we will call 'xen_blkif_disconnect' on Closed. 
> 

Will update..

>> +	}
>> +}
>> +
>> +static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
>> +{
>> +	int r;
> 
> unsigned int i;
> 
>> +
>> +	blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL);
>> +	if (!blkif->rings)
>> +		return -ENOMEM;
>> +
>> +	for (r = 0; r < blkif->nr_rings; r++) {
>> +		struct xen_blkif_ring *ring = &blkif->rings[r];
>> +
>> +		spin_lock_init(&ring->blk_ring_lock);
>> +		init_waitqueue_head(&ring->wq);
>> +		INIT_LIST_HEAD(&ring->pending_free);
>> +
>> +		spin_lock_init(&ring->pending_free_lock);
>> +		init_waitqueue_head(&ring->pending_free_wq);
>> +		init_waitqueue_head(&ring->shutdown_wq);
>> +		ring->blkif = blkif;
>> +		xen_blkif_get(blkif);
>>  	}
>> +
>> +	return 0;
>>  }
>>  
>>  static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>>  {
>>  	struct xen_blkif *blkif;
>> -	struct xen_blkif_ring *ring;
>>  
>>  	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
>>  
>> @@ -136,27 +175,17 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>>  	blkif->domid = domid;
>>  	atomic_set(&blkif->refcnt, 1);
>>  	init_completion(&blkif->drain_complete);
>> -	atomic_set(&blkif->drain, 0);
>>  	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
>>  	spin_lock_init(&blkif->free_pages_lock);
>>  	INIT_LIST_HEAD(&blkif->free_pages);
>> -	blkif->free_pages_num = 0;
>> -	blkif->persistent_gnts.rb_node = NULL;
>>  	INIT_LIST_HEAD(&blkif->persistent_purge_list);
>> -	atomic_set(&blkif->persistent_gnt_in_use, 0);
>>  	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
>>  
>> -	ring = &blkif->ring;
>> -	ring->blkif = blkif;
>> -	spin_lock_init(&ring->blk_ring_lock);
>> -	init_waitqueue_head(&ring->wq);
>> -	ring->st_print = jiffies;
>> -	atomic_set(&ring->inflight, 0);
>> -
>> -	INIT_LIST_HEAD(&ring->pending_free);
>> -	spin_lock_init(&ring->pending_free_lock);
>> -	init_waitqueue_head(&ring->pending_free_wq);
>> -	init_waitqueue_head(&ring->shutdown_wq);
>> +	blkif->nr_rings = 1;
>> +	if (xen_blkif_alloc_rings(blkif)) {
>> +		kmem_cache_free(xen_blkif_cachep, blkif);
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>>  
>>  	return blkif;
>>  }
>> @@ -218,50 +247,54 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
>>  static int xen_blkif_disconnect(struct xen_blkif *blkif)
>>  {
>>  	struct pending_req *req, *n;
>> -	int i = 0, j;
>> -	struct xen_blkif_ring *ring = &blkif->ring;
>> +	int j, r;
>>  
> 
> unsigned int i;
>> -	if (ring->xenblkd) {
>> -		kthread_stop(ring->xenblkd);
>> -		wake_up(&ring->shutdown_wq);
>> -		ring->xenblkd = NULL;
>> -	}
>> +	for (r = 0; r < blkif->nr_rings; r++) {
>> +		struct xen_blkif_ring *ring = &blkif->rings[r];
>> +		int i = 0;
> 
> unsigned int
>>  
>> -	/* The above kthread_stop() guarantees that at this point we
>> -	 * don't have any discard_io or other_io requests. So, checking
>> -	 * for inflight IO is enough.
>> -	 */
>> -	if (atomic_read(&ring->inflight) > 0)
>> -		return -EBUSY;
>> +		if (ring->xenblkd) {
>> +			kthread_stop(ring->xenblkd);
>> +			wake_up(&ring->shutdown_wq);
>> +			ring->xenblkd = NULL;
>> +		}
>>  
>> -	if (ring->irq) {
>> -		unbind_from_irqhandler(ring->irq, ring);
>> -		ring->irq = 0;
>> -	}
>> +		/* The above kthread_stop() guarantees that at this point we
>> +		 * don't have any discard_io or other_io requests. So, checking
>> +		 * for inflight IO is enough.
>> +		 */
>> +		if (atomic_read(&ring->inflight) > 0)
>> +			return -EBUSY;
>>  
>> -	if (ring->blk_rings.common.sring) {
>> -		xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
>> -		ring->blk_rings.common.sring = NULL;
>> -	}
>> +		if (ring->irq) {
>> +			unbind_from_irqhandler(ring->irq, ring);
>> +			ring->irq = 0;
>> +		}
>>  
>> -	/* Remove all persistent grants and the cache of ballooned pages. */
>> -	xen_blkbk_free_caches(ring);
>> +		if (ring->blk_rings.common.sring) {
>> +			xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
>> +			ring->blk_rings.common.sring = NULL;
>> +		}
>>  
>> -	/* Check that there is no request in use */
>> -	list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
>> -		list_del(&req->free_list);
>> +		/* Remove all persistent grants and the cache of ballooned pages. */
>> +		xen_blkbk_free_caches(ring);
>>  
>> -		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
>> -			kfree(req->segments[j]);
>> +		/* Check that there is no request in use */
>> +		list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
>> +			list_del(&req->free_list);
>>  
>> -		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
>> -			kfree(req->indirect_pages[j]);
>> +			for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
>> +				kfree(req->segments[j]);
>>  
>> -		kfree(req);
>> -		i++;
>> -	}
>> +			for (j = 0; j < MAX_INDIRECT_PAGES; j++)
>> +				kfree(req->indirect_pages[j]);
>>  
>> -	WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
>> +			kfree(req);
>> +			i++;
>> +		}
>> +
>> +		WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
>> +	}
>>  	blkif->nr_ring_pages = 0;
>>  
>>  	return 0;
>> @@ -281,6 +314,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
>>  	BUG_ON(!list_empty(&blkif->free_pages));
>>  	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
>>  
>> +	kfree(blkif->rings);
>>  	kmem_cache_free(xen_blkif_cachep, blkif);
>>  }
>>  
>> @@ -307,15 +341,19 @@ int __init xen_blkif_interface_init(void)
>>  		struct xenbus_device *dev = to_xenbus_device(_dev);	\
>>  		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
>>  		struct xen_blkif *blkif = be->blkif;			\
>> -		struct xen_blkif_ring *ring = &blkif->ring;		\
>> +		int i;							\
>> +									\
>> +		for (i = 0; i < blkif->nr_rings; i++) {			\
>> +			struct xen_blkif_ring *ring = &blkif->rings[i];	\
>>  									\
>> -		blkif->st_oo_req = ring->st_oo_req;			\
>> -		blkif->st_rd_req = ring->st_rd_req;			\
>> -		blkif->st_wr_req = ring->st_wr_req;			\
>> -		blkif->st_f_req = ring->st_f_req;			\
>> -		blkif->st_ds_req = ring->st_ds_req;			\
>> -		blkif->st_rd_sect = ring->st_rd_sect;			\
>> -		blkif->st_wr_sect = ring->st_wr_sect;			\
>> +			blkif->st_oo_req += ring->st_oo_req;		\
>> +			blkif->st_rd_req += ring->st_rd_req;		\
>> +			blkif->st_wr_req += ring->st_wr_req;		\
>> +			blkif->st_f_req += ring->st_f_req;		\
>> +			blkif->st_ds_req += ring->st_ds_req;		\
>> +			blkif->st_rd_sect += ring->st_rd_sect;		\
>> +			blkif->st_wr_sect += ring->st_wr_sect;		\
>> +		}							\
> 
> That needs fixing. You could alter the macro to only read the values
> from the proper member.

Do you think we still need these debug values for per-ring?
I'm thinking of just leave them for per-device(blkif), but that requires extra lock to protect?

-- 
Regards,
-Bob