All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@novell.com>
To: xen-devel@lists.xensource.com, Dong Yang Li <lidongyang@novell.com>
Cc: owen.smith@citrix.com
Subject: Re: <missing subject #3>
Date: Thu, 18 Aug 2011 11:33:44 +0100	[thread overview]
Message-ID: <4E4D06A80200007800051CD6@nat28.tlf.novell.com> (raw)
In-Reply-To: <8688c4e4.428@victor.provo.novell.com>

	>>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@novell.com> wrote:
> JBeulich@novell.com 
> Subject: [PATCH V2 3/3] xen-blkback: handle trim request in backend driver
> Date: Thu, 18 Aug 2011 17:34:31 +0800
> Message-Id: <1313660071-25230-4-git-send-email-lidongyang@novell.com>
> X-Mailer: git-send-email 1.7.6
> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@novell.com>
> References: <1313660071-25230-1-git-send-email-lidongyang@novell.com>
> 
> Now blkback driver can handle the trim request from guest, we will
> forward the request to phy device if it really has trim support, or we'll
> punch a hole on the image file.
> 
> Signed-off-by: Li Dongyang <lidongyang@novell.com>
> ---
>  drivers/block/xen-blkback/blkback.c |   85 +++++++++++++++++++++++++++++------
>  drivers/block/xen-blkback/common.h  |    4 +-
>  drivers/block/xen-blkback/xenbus.c  |   61 +++++++++++++++++++++++++
>  3 files changed, 135 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/blkback.c 
> b/drivers/block/xen-blkback/blkback.c
> index 2330a9a..5acc37a 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -39,6 +39,9 @@
>  #include <linux/list.h>
>  #include <linux/delay.h>
>  #include <linux/freezer.h>
> +#include <linux/loop.h>
> +#include <linux/falloc.h>
> +#include <linux/fs.h>
>  
>  #include <xen/events.h>
>  #include <xen/page.h>
> @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
>  
>  static void print_stats(struct xen_blkif *blkif)
>  {
> -	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
> +	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
> +		 "  |  tr %4d\n",
>  		 current->comm, blkif->st_oo_req,
> -		 blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
> +		 blkif->st_rd_req, blkif->st_wr_req,
> +		 blkif->st_f_req, blkif->st_tr_req);
>  	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
>  	blkif->st_rd_req = 0;
>  	blkif->st_wr_req = 0;
>  	blkif->st_oo_req = 0;
> +	blkif->st_tr_req = 0;
>  }
>  
>  int xen_blkif_schedule(void *arg)
> @@ -563,6 +569,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>  		blkif->st_f_req++;
>  		operation = WRITE_FLUSH;
>  		break;
> +	case BLKIF_OP_TRIM:
> +		blkif->st_tr_req++;
> +		operation = REQ_DISCARD;
> +		break;
>  	case BLKIF_OP_WRITE_BARRIER:
>  	default:
>  		operation = 0; /* make gcc happy */
> @@ -572,7 +582,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>  
>  	/* Check that the number of segments is sane. */
>  	nseg = req->nr_segments;
> -	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
> +	if (unlikely(nseg == 0 && operation != (WRITE_FLUSH | REQ_DISCARD)) ||

This will match neither WRITE_FLUSH nor REQ_DISCARD.

>  	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
>  		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
>  			 nseg);
> @@ -627,10 +637,13 @@ static int dispatch_rw_block_io(struct xen_blkif 
> *blkif,
>  	 * the hypercall to unmap the grants - that is all done in
>  	 * xen_blkbk_unmap.
>  	 */
> -	if (xen_blkbk_map(req, pending_req, seg))
> +	if (operation != BLKIF_OP_TRIM && xen_blkbk_map(req, pending_req, seg))
>  		goto fail_flush;
>  
> -	/* This corresponding xen_blkif_put is done in __end_block_io_op */
> +	/*
> +	 * This corresponding xen_blkif_put is done in __end_block_io_op, or
> +	 * below if we are handling a BLKIF_OP_TRIM.
> +	 */
>  	xen_blkif_get(blkif);
>  
>  	for (i = 0; i < nseg; i++) {
> @@ -654,18 +667,62 @@ static int dispatch_rw_block_io(struct xen_blkif 
> *blkif,
>  		preq.sector_number += seg[i].nsec;
>  	}
>  
> -	/* This will be hit if the operation was a flush. */
> +	/* This will be hit if the operation was a flush or trim. */
>  	if (!bio) {
> -		BUG_ON(operation != WRITE_FLUSH);
> +		BUG_ON(operation != (WRITE_FLUSH | REQ_DISCARD));

Same here.

>  
> -		bio = bio_alloc(GFP_KERNEL, 0);
> -		if (unlikely(bio == NULL))
> -			goto fail_put_bio;
> +		if (operation == WRITE_FLUSH) {
> +			bio = bio_alloc(GFP_KERNEL, 0);
> +			if (unlikely(bio == NULL))
> +				goto fail_put_bio;
>  
> -		biolist[nbio++] = bio;
> -		bio->bi_bdev    = preq.bdev;
> -		bio->bi_private = pending_req;
> -		bio->bi_end_io  = end_block_io_op;
> +			biolist[nbio++] = bio;
> +			bio->bi_bdev    = preq.bdev;
> +			bio->bi_private = pending_req;
> +			bio->bi_end_io  = end_block_io_op;
> +		} else if (operation == REQ_DISCARD) {
> +			int err = 0;
> +			int status = BLKIF_RSP_OKAY;
> +			struct block_device *bdev = blkif->vbd.bdev;
> +
> +			preq.nr_sects = req->u.trim.nr_sectors;
> +			if (blkif->vbd.type & VDISK_PHY_BACKEND)
> +				/* just forward the trim request */
> +				err = blkdev_issue_discard(bdev,
> +						preq.sector_number,
> +						preq.nr_sects,
> +						GFP_KERNEL, 0);
> +			else if (blkif->vbd.type & VDISK_FILE_BACKEND) {
> +				/* punch a hole in the backing file */
> +				struct loop_device *lo =
> +					bdev->bd_disk->private_data;
> +				struct file *file = lo->lo_backing_file;
> +
> +				if (file->f_op->fallocate)
> +					err = file->f_op->fallocate(file,
> +						FALLOC_FL_KEEP_SIZE |
> +						FALLOC_FL_PUNCH_HOLE,
> +						preq.sector_number << 9,
> +						preq.nr_sects << 9);
> +				else
> +					err = -EOPNOTSUPP;
> +			} else

Are you not worried about doing this synchronously, i.e. blocking any
other I/O going on for the device?

> +				status = BLKIF_RSP_EOPNOTSUPP;
> +
> +			if (err == -EOPNOTSUPP) {
> +				DPRINTK("blkback: discard op failed, "
> +						"not supported\n");
> +				status = BLKIF_RSP_EOPNOTSUPP;
> +			} else if (err)
> +				status = BLKIF_RSP_ERROR;
> +
> +			if (status == BLKIF_RSP_OKAY)
> +				blkif->st_tr_sect += preq.nr_sects;

I don't think this is a particularly useful statistic.

> +			make_response(blkif, req->id, req->operation, status);
> +			xen_blkif_put(blkif);
> +			free_req(pending_req);
> +			return 0;
> +		}
>  	}
>  
>  	/*
> diff --git a/drivers/block/xen-blkback/common.h 
> b/drivers/block/xen-blkback/common.h
> index 9e40b28..1fef727 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -159,8 +159,10 @@ struct xen_blkif {
>  	int			st_wr_req;
>  	int			st_oo_req;
>  	int			st_f_req;
> +	int			st_tr_req;
>  	int			st_rd_sect;
>  	int			st_wr_sect;
> +	int			st_tr_sect;
>  
>  	wait_queue_head_t	waiting_to_free;
>  
> @@ -182,7 +184,7 @@ struct xen_blkif {
>  
>  struct phys_req {
>  	unsigned short		dev;
> -	unsigned short		nr_sects;
> +	blkif_sector_t		nr_sects;
>  	struct block_device	*bdev;
>  	blkif_sector_t		sector_number;
>  };
> diff --git a/drivers/block/xen-blkback/xenbus.c 
> b/drivers/block/xen-blkback/xenbus.c
> index 3f129b4..05ea8e0 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -272,16 +272,20 @@ VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
>  VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
>  VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
>  VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
> +VBD_SHOW(tr_req, "%d\n", be->blkif->st_tr_req);
>  VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
>  VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
> +VBD_SHOW(tr_sect, "%d\n", be->blkif->st_tr_sect);
>  
>  static struct attribute *xen_vbdstat_attrs[] = {
>  	&dev_attr_oo_req.attr,
>  	&dev_attr_rd_req.attr,
>  	&dev_attr_wr_req.attr,
>  	&dev_attr_f_req.attr,
> +	&dev_attr_tr_req.attr,
>  	&dev_attr_rd_sect.attr,
>  	&dev_attr_wr_sect.attr,
> +	&dev_attr_tr_sect.attr,
>  	NULL
>  };
>  
> @@ -419,6 +423,59 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction 
> xbt,
>  	return err;
>  }
>  
> +int xen_blkbk_trim(struct xenbus_transaction xbt, struct backend_info *be)
> +{
> +	struct xenbus_device *dev = be->dev;
> +	struct xen_vbd *vbd = &be->blkif->vbd;
> +	char *type;
> +	int err;
> +	int state = 0;
> +
> +	type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
> +	if (!IS_ERR(type)) {
> +		if (strcmp(type, "file") == 0)
> +			state = 1;
> +			vbd->type |= VDISK_FILE_BACKEND;

Missing { and }.

Jan

> +		if (strcmp(type, "phy") == 0) {
> +			struct block_device *bdev = be->blkif->vbd.bdev;
> +			struct request_queue *q = bdev_get_queue(bdev);
> +			if (blk_queue_discard(q)) {
> +				err = xenbus_printf(xbt, dev->nodename,
> +					"discard_granularity", "%u",
> +					q->limits.discard_granularity);
> +				if (err) {
> +					xenbus_dev_fatal(dev, err,
> +						"writing discard_granularity");
> +					goto kfree;
> +				}
> +				err = xenbus_printf(xbt, dev->nodename,
> +					"discard_alignment", "%u",
> +					q->limits.discard_alignment);
> +				if (err) {
> +					xenbus_dev_fatal(dev, err,
> +						"writing discard_alignment");
> +					goto kfree;
> +				}
> +				state = 1;
> +				vbd->type |= VDISK_PHY_BACKEND;
> +			}
> +		}
> +	} else {
> +		err = PTR_ERR(type);
> +		xenbus_dev_fatal(dev, err, "reading type");
> +		goto out;
> +	}
> +
> +	err = xenbus_printf(xbt, dev->nodename, "feature-trim",
> +			    "%d", state);
> +	if (err)
> +		xenbus_dev_fatal(dev, err, "writing feature-trim");
> +kfree:
> +	kfree(type);
> +out:
> +	return err;
> +}
> +
>  /*
>   * Entry point to this code when a new device is created.  Allocate the 
> basic
>   * structures, and watch the store waiting for the hotplug scripts to tell 
> us
> @@ -650,6 +707,10 @@ again:
>  	if (err)
>  		goto abort;
>  
> +	err = xen_blkbk_trim(xbt, be);
> +	if (err)
> +		goto abort;
> +
>  	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
>  			    (unsigned long long)vbd_sz(&be->blkif->vbd));
>  	if (err) {

       reply	other threads:[~2011-08-18 10:33 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <8688c4e4.428@victor.provo.novell.com>
2011-08-18 10:33 ` Jan Beulich [this message]
     [not found] ` <4E4D06A80200007800051CD6@victor.provo.novell.com>
2011-08-22  9:24   ` <missing subject #3> Li Dongyang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E4D06A80200007800051CD6@nat28.tlf.novell.com \
    --to=jbeulich@novell.com \
    --cc=lidongyang@novell.com \
    --cc=owen.smith@citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.