From mboxrd@z Thu Jan  1 00:00:00 1970
From: David Vrabel <david.vrabel@citrix.com>
Subject: Re: [PATCH 12/14] xen-blkback: safely unmap grants in
 case they are still in use
Date: Mon, 26 Jan 2015 15:02:32 +0000
Message-ID: <54C65708.8060304@citrix.com>
References: <1421682692-20628-1-git-send-email-david.vrabel@citrix.com>
	<1421682692-20628-13-git-send-email-david.vrabel@citrix.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Return-path: <xen-devel-bounces@lists.xen.org>
Received: from mail6.bemta14.messagelabs.com ([193.109.254.103])
	by lists.xen.org with esmtp (Exim 4.72)
	(envelope-from <david.vrabel@citrix.com>) id 1YFlCL-0005hO-0l
	for xen-devel@lists.xenproject.org; Mon, 26 Jan 2015 15:03:29 +0000
In-Reply-To: <1421682692-20628-13-git-send-email-david.vrabel@citrix.com>
List-Unsubscribe: <http://lists.xen.org/cgi-bin/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xen.org>
List-Help: <mailto:xen-devel-request@lists.xen.org?subject=help>
List-Subscribe: <http://lists.xen.org/cgi-bin/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=subscribe>
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org
To: xen-devel <xen-devel@lists.xenproject.org>, Jens Axboe <axboe@kernel.dk>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>, Jenny Herbert <jennifer.herbert@citrix.com>
List-Id: xen-devel@lists.xenproject.org

On 19/01/15 15:51, David Vrabel wrote:
> From: Jenny Herbert <jennifer.herbert@citrix.com>
> 
> Use gnttab_unmap_refs_async() to wait until the mapped pages are no
> longer in use before unmapping them.
> 
> This allows blkback to use network storage which may retain refs to
> pages in queued skbs after the block I/O has completed.

Hi Jens,

This xen-blkback change depends on several Xen changes and it would be
easiest if this went via the Xen tree.  Are you ok with this?

David

> Signed-off-by: Jenny Herbert <jennifer.herbert@citrix.com>
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> ---
>  drivers/block/xen-blkback/blkback.c |  175 +++++++++++++++++++++++++----------
>  drivers/block/xen-blkback/common.h  |    3 +
>  2 files changed, 127 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
> index 908e630..39a7aba 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -47,6 +47,7 @@
>  #include <asm/xen/hypervisor.h>
>  #include <asm/xen/hypercall.h>
>  #include <xen/balloon.h>
> +#include <xen/grant_table.h>
>  #include "common.h"
>  
>  /*
> @@ -262,6 +263,17 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
>  	atomic_dec(&blkif->persistent_gnt_in_use);
>  }
>  
> +static void free_persistent_gnts_unmap_callback(int result,
> +						struct gntab_unmap_queue_data *data)
> +{
> +	struct completion* c = data->data;
> +
> +	/* BUG_ON used to reproduce existing behaviour,
> +	   but is this the best way to deal with this? */
> +	BUG_ON(result);
> +	complete(c);
> +}
> +
>  static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
>                                   unsigned int num)
>  {
> @@ -269,8 +281,17 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
>  	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>  	struct persistent_gnt *persistent_gnt;
>  	struct rb_node *n;
> -	int ret = 0;
>  	int segs_to_unmap = 0;
> +	struct gntab_unmap_queue_data unmap_data;
> +	struct completion unmap_completion;
> +
> +	init_completion(&unmap_completion);
> +
> +	unmap_data.data = &unmap_completion;
> +	unmap_data.done = &free_persistent_gnts_unmap_callback;
> +	unmap_data.pages = pages;
> +	unmap_data.unmap_ops = unmap;
> +	unmap_data.kunmap_ops = NULL;
>  
>  	foreach_grant_safe(persistent_gnt, n, root, node) {
>  		BUG_ON(persistent_gnt->handle ==
> @@ -285,9 +306,11 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
>  
>  		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
>  			!rb_next(&persistent_gnt->node)) {
> -			ret = gnttab_unmap_refs(unmap, NULL, pages,
> -				segs_to_unmap);
> -			BUG_ON(ret);
> +
> +			unmap_data.count = segs_to_unmap;
> +			gnttab_unmap_refs_async(&unmap_data);
> +			wait_for_completion(&unmap_completion);
> +
>  			put_free_pages(blkif, pages, segs_to_unmap);
>  			segs_to_unmap = 0;
>  		}
> @@ -653,18 +676,14 @@ void xen_blkbk_free_caches(struct xen_blkif *blkif)
>  	shrink_free_pagepool(blkif, 0 /* All */);
>  }
>  
> -/*
> - * Unmap the grant references, and also remove the M2P over-rides
> - * used in the 'pending_req'.
> - */
> -static void xen_blkbk_unmap(struct xen_blkif *blkif,
> -                            struct grant_page *pages[],
> -                            int num)
> +static unsigned int xen_blkbk_unmap_prepare(
> +	struct xen_blkif *blkif,
> +	struct grant_page **pages,
> +	unsigned int num,
> +	struct gnttab_unmap_grant_ref *unmap_ops,
> +	struct page **unmap_pages)
>  {
> -	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> -	struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>  	unsigned int i, invcount = 0;
> -	int ret;
>  
>  	for (i = 0; i < num; i++) {
>  		if (pages[i]->persistent_gnt != NULL) {
> @@ -674,21 +693,99 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
>  		if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
>  			continue;
>  		unmap_pages[invcount] = pages[i]->page;
> -		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
> +		gnttab_set_unmap_op(&unmap_ops[invcount], vaddr(pages[invcount]->page),
>  				    GNTMAP_host_map, pages[i]->handle);
>  		pages[i]->handle = BLKBACK_INVALID_HANDLE;
> -		if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
> -			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
> -			                        invcount);
> +		invcount++;
> +       }
> +
> +       return invcount;
> +}
> +
> +static void xen_blkbk_unmap_done(struct xen_blkif *blkif,
> +				 struct page *unmap_pages[],
> +				 unsigned int num)
> +{
> +	put_free_pages(blkif, unmap_pages, num);
> +}
> +
> +static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
> +{
> +	struct pending_req* pending_req = (struct pending_req*) (data->data);
> +	struct xen_blkif *blkif = pending_req->blkif;
> +
> +	/* BUG_ON used to reproduce existing behaviour,
> +	   but is this the best way to deal with this? */
> +	BUG_ON(result);
> +
> +	xen_blkbk_unmap_done(blkif, data->pages, data->count);
> +	make_response(blkif, pending_req->id,
> +		      pending_req->operation, pending_req->status);
> +	free_req(blkif, pending_req);
> +	/*
> +	 * Make sure the request is freed before releasing blkif,
> +	 * or there could be a race between free_req and the
> +	 * cleanup done in xen_blkif_free during shutdown.
> +	 *
> +	 * NB: The fact that we might try to wake up pending_free_wq
> +	 * before drain_complete (in case there's a drain going on)
> +	 * it's not a problem with our current implementation
> +	 * because we can assure there's no thread waiting on
> +	 * pending_free_wq if there's a drain going on, but it has
> +	 * to be taken into account if the current model is changed.
> +	 */
> +	if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
> +		complete(&blkif->drain_complete);
> +	}
> +	xen_blkif_put(blkif);
> +}
> +
> +static void xen_blkbk_unmap_and_respond(struct pending_req *req)
> +{
> +	struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
> +	struct xen_blkif *blkif = req->blkif;
> +	struct grant_page **pages = req->segments;
> +	unsigned int invcount;
> +
> +	invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
> +					   req->unmap, req->unmap_pages);
> +
> +	work->data = req;
> +	work->done = xen_blkbk_unmap_and_respond_callback;
> +	work->unmap_ops = req->unmap;
> +	work->kunmap_ops = NULL;
> +	work->pages = req->unmap_pages;
> +	work->count = invcount;
> +
> +	gnttab_unmap_refs_async(&req->gnttab_unmap_data);
> +}
> +
> +
> +/*
> + * Unmap the grant references, and also remove the M2P over-rides
> + * used in the 'pending_req'.
> + */
> +static void xen_blkbk_unmap(struct xen_blkif *blkif,
> +                            struct grant_page *pages[],
> +                            int num)
> +{
> +	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +	struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +	unsigned int invcount = 0;
> +	int ret;
> +
> +	while (num) {
> +		unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
> +		
> +		invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
> +						   unmap, unmap_pages);
> +		if (invcount) {
> +			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
>  			BUG_ON(ret);
> -			put_free_pages(blkif, unmap_pages, invcount);
> -			invcount = 0;
> +			xen_blkbk_unmap_done(blkif, unmap_pages, invcount);
>  		}
> -	}
> -	if (invcount) {
> -		ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
> -		BUG_ON(ret);
> -		put_free_pages(blkif, unmap_pages, invcount);
> +		pages += batch;
> +		num -= batch;
>  	}
>  }
>  
> @@ -982,32 +1079,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
>  	 * the grant references associated with 'request' and provide
>  	 * the proper response on the ring.
>  	 */
> -	if (atomic_dec_and_test(&pending_req->pendcnt)) {
> -		struct xen_blkif *blkif = pending_req->blkif;
> -
> -		xen_blkbk_unmap(blkif,
> -		                pending_req->segments,
> -		                pending_req->nr_pages);
> -		make_response(blkif, pending_req->id,
> -			      pending_req->operation, pending_req->status);
> -		free_req(blkif, pending_req);
> -		/*
> -		 * Make sure the request is freed before releasing blkif,
> -		 * or there could be a race between free_req and the
> -		 * cleanup done in xen_blkif_free during shutdown.
> -		 *
> -		 * NB: The fact that we might try to wake up pending_free_wq
> -		 * before drain_complete (in case there's a drain going on)
> -		 * it's not a problem with our current implementation
> -		 * because we can assure there's no thread waiting on
> -		 * pending_free_wq if there's a drain going on, but it has
> -		 * to be taken into account if the current model is changed.
> -		 */
> -		if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
> -			complete(&blkif->drain_complete);
> -		}
> -		xen_blkif_put(blkif);
> -	}
> +	if (atomic_dec_and_test(&pending_req->pendcnt))
> +		xen_blkbk_unmap_and_respond(pending_req);
>  }
>  
>  /*
> diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
> index f65b807..428a34d 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -350,6 +350,9 @@ struct pending_req {
>  	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
>  	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
>  	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
> +	struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
> +	struct page *unmap_pages[MAX_INDIRECT_SEGMENTS];
> +	struct gntab_unmap_queue_data gnttab_unmap_data;
>  };
>  
>  
>