From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751187AbbJJEJj (ORCPT <rfc822;w@1wt.eu>);
	Sat, 10 Oct 2015 00:09:39 -0400
Received: from userp1040.oracle.com ([156.151.31.81]:34795 "EHLO
	userp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1750732AbbJJEJe (ORCPT
	<rfc822;linux-kernel@vger.kernel.org>);
	Sat, 10 Oct 2015 00:09:34 -0400
Message-ID: <56188F4A.3060802@oracle.com>
Date: Sat, 10 Oct 2015 12:08:42 +0800
From: Bob Liu <bob.liu@oracle.com>
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130308 Thunderbird/17.0.4
MIME-Version: 1.0
To: =?windows-1252?Q?Roger_Pau_Monn=E9?= <roger.pau@citrix.com>
CC: xen-devel@lists.xen.org, david.vrabel@citrix.com,
        linux-kernel@vger.kernel.org, konrad.wilk@oracle.com,
        felipe.franciosi@citrix.com, axboe@fb.com, hch@infradead.org,
        avanzini.arianna@gmail.com, rafal.mielniczuk@citrix.com,
        boris.ostrovsky@oracle.com, jonathan.davies@citrix.com
Subject: Re: [PATCH v3 7/9] xen/blkback: separate ring information out of
 struct xen_blkif
References: <1441456782-31318-1-git-send-email-bob.liu@oracle.com> <1441456782-31318-8-git-send-email-bob.liu@oracle.com> <56128F54.1090907@citrix.com>
In-Reply-To: <56128F54.1090907@citrix.com>
Content-Type: text/plain; charset=windows-1252
Content-Transfer-Encoding: 8bit
X-Source-IP: aserv0022.oracle.com [141.146.126.234]
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org


On 10/05/2015 10:55 PM, Roger Pau Monné wrote:
> El 05/09/15 a les 14.39, Bob Liu ha escrit:
>> Split per ring information to an new structure:xen_blkif_ring, so that one vbd
>> device can associate with one or more rings/hardware queues.
>>
>> This patch is a preparation for supporting multi hardware queues/rings.
>>
>> Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> ---
>>  drivers/block/xen-blkback/blkback.c |  365 ++++++++++++++++++-----------------
>>  drivers/block/xen-blkback/common.h  |   52 +++--
>>  drivers/block/xen-blkback/xenbus.c  |  130 +++++++------
>>  3 files changed, 295 insertions(+), 252 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
>> index 954c002..fd02240 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -113,71 +113,71 @@ module_param(log_stats, int, 0644);
>>  /* Number of free pages to remove on each call to gnttab_free_pages */
>>  #define NUM_BATCH_FREE_PAGES 10
>>  
>> -static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
>> +static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
>>  {
>>  	unsigned long flags;
>>  
>> -	spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> -	if (list_empty(&blkif->free_pages)) {
>> -		BUG_ON(blkif->free_pages_num != 0);
>> -		spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +	spin_lock_irqsave(&ring->free_pages_lock, flags);
>> +	if (list_empty(&ring->free_pages)) {
> 
> I'm afraid the pool of free pages should be per-device, not per-ring.
> 
>> +		BUG_ON(ring->free_pages_num != 0);
>> +		spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  		return gnttab_alloc_pages(1, page);
>>  	}
>> -	BUG_ON(blkif->free_pages_num == 0);
>> -	page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
>> +	BUG_ON(ring->free_pages_num == 0);
>> +	page[0] = list_first_entry(&ring->free_pages, struct page, lru);
>>  	list_del(&page[0]->lru);
>> -	blkif->free_pages_num--;
>> -	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +	ring->free_pages_num--;
>> +	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  
>>  	return 0;
>>  }
>>  
>> -static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
>> +static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
>>                                    int num)
>>  {
>>  	unsigned long flags;
>>  	int i;
>>  
>> -	spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> +	spin_lock_irqsave(&ring->free_pages_lock, flags);
>>  	for (i = 0; i < num; i++)
>> -		list_add(&page[i]->lru, &blkif->free_pages);
>> -	blkif->free_pages_num += num;
>> -	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +		list_add(&page[i]->lru, &ring->free_pages);
>> +	ring->free_pages_num += num;
>> +	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  }
>>  
>> -static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
>> +static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
>>  {
>>  	/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
>>  	struct page *page[NUM_BATCH_FREE_PAGES];
>>  	unsigned int num_pages = 0;
>>  	unsigned long flags;
>>  
>> -	spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> -	while (blkif->free_pages_num > num) {
>> -		BUG_ON(list_empty(&blkif->free_pages));
>> -		page[num_pages] = list_first_entry(&blkif->free_pages,
>> +	spin_lock_irqsave(&ring->free_pages_lock, flags);
>> +	while (ring->free_pages_num > num) {
>> +		BUG_ON(list_empty(&ring->free_pages));
>> +		page[num_pages] = list_first_entry(&ring->free_pages,
>>  		                                   struct page, lru);
>>  		list_del(&page[num_pages]->lru);
>> -		blkif->free_pages_num--;
>> +		ring->free_pages_num--;
>>  		if (++num_pages == NUM_BATCH_FREE_PAGES) {
>> -			spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +			spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  			gnttab_free_pages(num_pages, page);
>> -			spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> +			spin_lock_irqsave(&ring->free_pages_lock, flags);
>>  			num_pages = 0;
>>  		}
>>  	}
>> -	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  	if (num_pages != 0)
>>  		gnttab_free_pages(num_pages, page);
>>  }
>>  
>>  #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
>>  
>> -static int do_block_io_op(struct xen_blkif *blkif);
>> -static int dispatch_rw_block_io(struct xen_blkif *blkif,
>> +static int do_block_io_op(struct xen_blkif_ring *ring);
>> +static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
>>  				struct blkif_request *req,
>>  				struct pending_req *pending_req);
>> -static void make_response(struct xen_blkif *blkif, u64 id,
>> +static void make_response(struct xen_blkif_ring *ring, u64 id,
>>  			  unsigned short op, int st);
>>  
>>  #define foreach_grant_safe(pos, n, rbtree, node) \
>> @@ -198,19 +198,19 @@ static void make_response(struct xen_blkif *blkif, u64 id,
>>   * bit operations to modify the flags of a persistent grant and to count
>>   * the number of used grants.
>>   */
>> -static int add_persistent_gnt(struct xen_blkif *blkif,
>> +static int add_persistent_gnt(struct xen_blkif_ring *ring,
>>  			       struct persistent_gnt *persistent_gnt)
>>  {
>>  	struct rb_node **new = NULL, *parent = NULL;
>>  	struct persistent_gnt *this;
>>  
>> -	if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
>> -		if (!blkif->vbd.overflow_max_grants)
>> -			blkif->vbd.overflow_max_grants = 1;
>> +	if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
>> +		if (!ring->blkif->vbd.overflow_max_grants)
>> +			ring->blkif->vbd.overflow_max_grants = 1;
> 
> The same for the pool of persistent grants, it should be per-device and
> not per-ring.
> 
> And I think this issue is far worse than the others, because a frontend
> might use a persistent grant on different queues, forcing the backend
> map the grant several times for each queue, this is not acceptable IMO.
> 

Hi Roger,

I realize it would make things complicate if making persistent grant per-device instead of per-queue.
Extra locks are required to protect the per-device pool on both blkfront and blkback.

AFAIR, there was a discussion before about dropping persistent grant map at all.
The only reason we left this feature was backward compatibility.
So that I think we should not complicate xen-block code any more because of a going to be dropped feature.

How about disable feature-persistent if multi-queue was used?

-- 
Regards,
-Bob