From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: Re: [PATCH V3 2/3] xen-blkfront: teach blkfront driver handle trim request Date: Wed, 24 Aug 2011 11:42:42 +0100 Message-ID: <4E54F1C20200007800052DF8@nat28.tlf.novell.com> References: <1314177825-22360-1-git-send-email-lidongyang@novell.com> <1314177825-22360-3-git-send-email-lidongyang@novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: <1314177825-22360-3-git-send-email-lidongyang@novell.com> Content-Disposition: inline List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: xen-devel@lists.xensource.com, Dong Yang Li Cc: owen.smith@citrix.com List-Id: xen-devel@lists.xenproject.org >>> On 24.08.11 at 11:23, Li Dongyang wrote: > The blkfront driver now will read feature-trim from xenstore, > and set up the request queue with trim params, then we can forward the > discard requests to backend driver. >=20 > Signed-off-by: Li Dongyang > --- > drivers/block/xen-blkfront.c | 111 +++++++++++++++++++++++++++++++++---= ------ > 1 files changed, 88 insertions(+), 23 deletions(-) >=20 > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 9ea8c25..aa3cede 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -98,6 +98,9 @@ struct blkfront_info > unsigned long shadow_free; > unsigned int feature_flush; > unsigned int flush_op; > + unsigned int feature_trim; > + unsigned int discard_granularity; > + unsigned int discard_alignment; > int is_ready; > }; > =20 > @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request = *req) > ring_req->operation =3D info->flush_op; > } > =20 > - ring_req->nr_segments =3D blk_rq_map_sg(req->q, req, info->sg); > - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); > + if (unlikely(req->cmd_flags & REQ_DISCARD)) { > + /* id, sector_number and handle are set above. */ > + ring_req->operation =3D BLKIF_OP_TRIM; > + ring_req->nr_segments =3D 0; > + ring_req->u.trim.nr_sectors =3D blk_rq_sectors(req); > + } else { > + ring_req->nr_segments =3D blk_rq_map_sg(req->q, req, = info->sg); > + BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUE= ST); > =20 > - for_each_sg(info->sg, sg, ring_req->nr_segments, i) { > - buffer_mfn =3D pfn_to_mfn(page_to_pfn(sg_page(sg))); > - fsect =3D sg->offset >> 9; > - lsect =3D fsect + (sg->length >> 9) - 1; > - /* install a grant reference. */ > - ref =3D gnttab_claim_grant_reference(&gref_head); > - BUG_ON(ref =3D=3D -ENOSPC); > + for_each_sg(info->sg, sg, ring_req->nr_segments, i) { > + buffer_mfn =3D pfn_to_mfn(page_to_pfn(sg_page(sg)))= ; > + fsect =3D sg->offset >> 9; > + lsect =3D fsect + (sg->length >> 9) - 1; > + /* install a grant reference. */ > + ref =3D gnttab_claim_grant_reference(&gref_head); > + BUG_ON(ref =3D=3D -ENOSPC); > =20 > - gnttab_grant_foreign_access_ref( > - ref, > - info->xbdev->otherend_id, > - buffer_mfn, > - rq_data_dir(req) ); > - > - info->shadow[id].frame[i] =3D mfn_to_pfn(buffer_mfn); > - ring_req->u.rw.seg[i] =3D > - (struct blkif_request_segment) { > - .gref =3D ref, > - .first_sect =3D fsect, > - .last_sect =3D lsect }; > + gnttab_grant_foreign_access_ref( > + ref, > + info->xbdev->otherend_id, > + buffer_mfn, > + rq_data_dir(req)); > + > + info->shadow[id].frame[i] =3D mfn_to_pfn(buffer_mfn= ); > + ring_req->u.rw.seg[i] =3D > + (struct blkif_request_segment) { > + .gref =3D ref, > + .first_sect =3D fsect, > + .last_sect =3D lsect }; > + } > } > =20 > info->ring.req_prod_pvt++; > @@ -399,6 +409,7 @@ wait: > static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > { > struct request_queue *rq; > + struct blkfront_info *info =3D gd->private_data; > =20 > rq =3D blk_init_queue(do_blkif_request, &blkif_io_lock); > if (rq =3D=3D NULL) > @@ -406,6 +417,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, = u16=20 > sector_size) > =20 > queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); > =20 > + if (info->feature_trim) { > + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); > + blk_queue_max_discard_sectors(rq, get_capacity(gd)); > + rq->limits.discard_granularity =3D info->discard_granularit= y; > + rq->limits.discard_alignment =3D info->discard_alignment; Don't you also need to set rq->limits.max_discard_sectors here (since when zero blkdev_issue_discard() doesn't do anything)? And wouldn't that need to be propagated from the backend, too? > + } > + > /* Hard sector size and max sectors impersonate the equiv. = hardware. */ > blk_queue_logical_block_size(rq, sector_size); > blk_queue_max_hw_sectors(rq, 512); > @@ -722,6 +740,19 @@ static irqreturn_t blkif_interrupt(int irq, void=20 > *dev_id) > =20 > error =3D (bret->status =3D=3D BLKIF_RSP_OKAY) ? 0 : -EIO; > switch (bret->operation) { > + case BLKIF_OP_TRIM: > + if (unlikely(bret->status =3D=3D BLKIF_RSP_EOPNOTSU= PP)) { > + struct request_queue *rq =3D info->rq; > + printk(KERN_WARNING "blkfront: %s: trim op = failed\n", > + info->gd->disk_name); > + error =3D -EOPNOTSUPP; > + info->feature_trim =3D 0; > + spin_lock(rq->queue_lock); > + queue_flag_clear(QUEUE_FLAG_DISCARD, rq); > + spin_unlock(rq->queue_lock); > + } > + __blk_end_request_all(req, error); > + break; > case BLKIF_OP_FLUSH_DISKCACHE: > case BLKIF_OP_WRITE_BARRIER: > if (unlikely(bret->status =3D=3D BLKIF_RSP_EOPNOTSU= PP)) { > @@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info) > bdput(bdev); > } > =20 > +static void blkfront_setup_trim(struct blkfront_info *info) > +{ > + int err; > + char *type; > + unsigned int discard_granularity; > + unsigned int discard_alignment; > + > + type =3D xenbus_read(XBT_NIL, info->xbdev->otherend, "type", = NULL); > + if (IS_ERR(type)) > + return; > + > + if (strncmp(type, "phy", 3) =3D=3D 0) { > + err =3D xenbus_gather(XBT_NIL, info->xbdev->otherend, > + "discard-granularity", "%u", &discard_granularity, > + "discard-alignment", "%u", &discard_alignment, > + NULL); Let me repeat my wish to have these nodes start with "trim-" rather than "discard-", so they can be easily associated with the "feature-trim" one. Jan > + if (!err) { > + info->feature_trim =3D 1; > + info->discard_granularity =3D discard_granularity; > + info->discard_alignment =3D discard_alignment; > + } > + } else if (strncmp(type, "file", 4) =3D=3D 0) > + info->feature_trim =3D 1; > + > + kfree(type); > +} > + > /* > * Invoked when the backend is finally 'ready' (and has told produced > * the details about the physical device - #sectors, size, etc). > @@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info= =20 > *info) > unsigned long sector_size; > unsigned int binfo; > int err; > - int barrier, flush; > + int barrier, flush, trim; > =20 > switch (info->connected) { > case BLKIF_STATE_CONNECTED: > @@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info= =20 > *info) > info->feature_flush =3D REQ_FLUSH; > info->flush_op =3D BLKIF_OP_FLUSH_DISKCACHE; > } > - =09 > + > + err =3D xenbus_gather(XBT_NIL, info->xbdev->otherend, > + "feature-trim", "%d", &trim, > + NULL); > + > + if (!err && trim) > + blkfront_setup_trim(info); > + > err =3D xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > if (err) { > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",