From mboxrd@z Thu Jan 1 00:00:00 1970 From: Li Dongyang Subject: Re: [PATCH V3 2/3] xen-blkfront: teach blkfront driver handle trim request Date: Thu, 25 Aug 2011 14:37:32 +0800 Message-ID: References: <1314177825-22360-1-git-send-email-lidongyang@novell.com> <1314177825-22360-3-git-send-email-lidongyang@novell.com> <4E54F1C20200007800052DF8@victor.provo.novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: <4E54F1C20200007800052DF8@victor.provo.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Jan Beulich Cc: xen-devel@lists.xensource.com, owen.smith@citrix.com List-Id: xen-devel@lists.xenproject.org On Wed, Aug 24, 2011 at 6:42 PM, Jan Beulich wrote: >>>> On 24.08.11 at 11:23, Li Dongyang wrote: >> The blkfront driver now will read feature-trim from xenstore, >> and set up the request queue with trim params, then we can forward the >> discard requests to backend driver. >> >> Signed-off-by: Li Dongyang >> --- >> =A0drivers/block/xen-blkfront.c | =A0111 +++++++++++++++++++++++++++++++= ++--------- >> =A01 files changed, 88 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c >> index 9ea8c25..aa3cede 100644 >> --- a/drivers/block/xen-blkfront.c >> +++ b/drivers/block/xen-blkfront.c >> @@ -98,6 +98,9 @@ struct blkfront_info >> =A0 =A0 =A0 unsigned long shadow_free; >> =A0 =A0 =A0 unsigned int feature_flush; >> =A0 =A0 =A0 unsigned int flush_op; >> + =A0 =A0 unsigned int feature_trim; >> + =A0 =A0 unsigned int discard_granularity; >> + =A0 =A0 unsigned int discard_alignment; >> =A0 =A0 =A0 int is_ready; >> =A0}; >> >> @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req= ) >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 ring_req->operation =3D info->flush_op; >> =A0 =A0 =A0 } >> >> - =A0 =A0 ring_req->nr_segments =3D blk_rq_map_sg(req->q, req, info->sg)= ; >> - =A0 =A0 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)= ; >> + =A0 =A0 if (unlikely(req->cmd_flags & REQ_DISCARD)) { >> + =A0 =A0 =A0 =A0 =A0 =A0 /* id, sector_number and handle are set above.= */ >> + =A0 =A0 =A0 =A0 =A0 =A0 ring_req->operation =3D BLKIF_OP_TRIM; >> + =A0 =A0 =A0 =A0 =A0 =A0 ring_req->nr_segments =3D 0; >> + =A0 =A0 =A0 =A0 =A0 =A0 ring_req->u.trim.nr_sectors =3D blk_rq_sectors= (req); >> + =A0 =A0 } else { >> + =A0 =A0 =A0 =A0 =A0 =A0 ring_req->nr_segments =3D blk_rq_map_sg(req->q= , req, info->sg); >> + =A0 =A0 =A0 =A0 =A0 =A0 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGME= NTS_PER_REQUEST); >> >> - =A0 =A0 for_each_sg(info->sg, sg, ring_req->nr_segments, i) { >> - =A0 =A0 =A0 =A0 =A0 =A0 buffer_mfn =3D pfn_to_mfn(page_to_pfn(sg_page(= sg))); >> - =A0 =A0 =A0 =A0 =A0 =A0 fsect =3D sg->offset >> 9; >> - =A0 =A0 =A0 =A0 =A0 =A0 lsect =3D fsect + (sg->length >> 9) - 1; >> - =A0 =A0 =A0 =A0 =A0 =A0 /* install a grant reference. */ >> - =A0 =A0 =A0 =A0 =A0 =A0 ref =3D gnttab_claim_grant_reference(&gref_hea= d); >> - =A0 =A0 =A0 =A0 =A0 =A0 BUG_ON(ref =3D=3D -ENOSPC); >> + =A0 =A0 =A0 =A0 =A0 =A0 for_each_sg(info->sg, sg, ring_req->nr_segment= s, i) { >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 buffer_mfn =3D pfn_to_mfn(page= _to_pfn(sg_page(sg))); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 fsect =3D sg->offset >> 9; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lsect =3D fsect + (sg->length = >> 9) - 1; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* install a grant reference. = */ >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ref =3D gnttab_claim_grant_ref= erence(&gref_head); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BUG_ON(ref =3D=3D -ENOSPC); >> >> - =A0 =A0 =A0 =A0 =A0 =A0 gnttab_grant_foreign_access_ref( >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ref, >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->xbdev->o= therend_id, >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 buffer_mfn, >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rq_data_dir(re= q) ); >> - >> - =A0 =A0 =A0 =A0 =A0 =A0 info->shadow[id].frame[i] =3D mfn_to_pfn(buffe= r_mfn); >> - =A0 =A0 =A0 =A0 =A0 =A0 ring_req->u.rw.seg[i] =3D >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 (struct blkif_= request_segment) { >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 .gref =A0 =A0 =A0 =3D ref, >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 .first_sect =3D fsect, >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 .last_sect =A0=3D lsect }; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 gnttab_grant_foreign_access_re= f( >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 ref, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 info->xbdev->otherend_id, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 buffer_mfn, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 rq_data_dir(req)); >> + >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->shadow[id].frame[i] =3D = mfn_to_pfn(buffer_mfn); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ring_req->u.rw.seg[i] =3D >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 (struct blkif_request_segment) { >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0 =A0 =A0 =A0 .gref =A0 =A0 =A0 =3D ref, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0 =A0 =A0 =A0 .first_sect =3D fsect, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0 =A0 =A0 =A0 .last_sect =A0=3D lsect }; >> + =A0 =A0 =A0 =A0 =A0 =A0 } >> =A0 =A0 =A0 } >> >> =A0 =A0 =A0 info->ring.req_prod_pvt++; >> @@ -399,6 +409,7 @@ wait: >> =A0static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) >> =A0{ >> =A0 =A0 =A0 struct request_queue *rq; >> + =A0 =A0 struct blkfront_info *info =3D gd->private_data; >> >> =A0 =A0 =A0 rq =3D blk_init_queue(do_blkif_request, &blkif_io_lock); >> =A0 =A0 =A0 if (rq =3D=3D NULL) >> @@ -406,6 +417,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd,= u16 >> sector_size) >> >> =A0 =A0 =A0 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); >> >> + =A0 =A0 if (info->feature_trim) { >> + =A0 =A0 =A0 =A0 =A0 =A0 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq= ); >> + =A0 =A0 =A0 =A0 =A0 =A0 blk_queue_max_discard_sectors(rq, get_capacity= (gd)); >> + =A0 =A0 =A0 =A0 =A0 =A0 rq->limits.discard_granularity =3D info->disca= rd_granularity; >> + =A0 =A0 =A0 =A0 =A0 =A0 rq->limits.discard_alignment =3D info->discard= _alignment; > > Don't you also need to set rq->limits.max_discard_sectors here (since > when zero blkdev_issue_discard() doesn't do anything)? And wouldn't > that need to be propagated from the backend, too? the max_discard_sectors are set by blk_queue_max_discard_sectors() above ;-= ) rq->limits.max_discard_sectors is the full phy device size, and if we only assign a partition to guest, the number is incorrect for guest, so the max_discard_sectors should be the capacity the guest will see, Thanks > >> + =A0 =A0 } >> + >> =A0 =A0 =A0 /* Hard sector size and max sectors impersonate the equiv. h= ardware. */ >> =A0 =A0 =A0 blk_queue_logical_block_size(rq, sector_size); >> =A0 =A0 =A0 blk_queue_max_hw_sectors(rq, 512); >> @@ -722,6 +740,19 @@ static irqreturn_t blkif_interrupt(int irq, void >> *dev_id) >> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 error =3D (bret->status =3D=3D BLKIF_RSP_OKA= Y) ? 0 : -EIO; >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 switch (bret->operation) { >> + =A0 =A0 =A0 =A0 =A0 =A0 case BLKIF_OP_TRIM: >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (unlikely(bret->status =3D= =3D BLKIF_RSP_EOPNOTSUPP)) { >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct request= _queue *rq =3D info->rq; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 printk(KERN_WA= RNING "blkfront: %s: trim op failed\n", >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 = =A0 =A0 =A0info->gd->disk_name); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 error =3D -EOP= NOTSUPP; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->feature_= trim =3D 0; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(rq->= queue_lock); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 queue_flag_cle= ar(QUEUE_FLAG_DISCARD, rq); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(rq= ->queue_lock); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 } >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 __blk_end_request_all(req, err= or); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 break; >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 case BLKIF_OP_FLUSH_DISKCACHE: >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 case BLKIF_OP_WRITE_BARRIER: >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (unlikely(bret->status = =3D=3D BLKIF_RSP_EOPNOTSUPP)) { >> @@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info) >> =A0 =A0 =A0 bdput(bdev); >> =A0} >> >> +static void blkfront_setup_trim(struct blkfront_info *info) >> +{ >> + =A0 =A0 int err; >> + =A0 =A0 char *type; >> + =A0 =A0 unsigned int discard_granularity; >> + =A0 =A0 unsigned int discard_alignment; >> + >> + =A0 =A0 type =3D xenbus_read(XBT_NIL, info->xbdev->otherend, "type", N= ULL); >> + =A0 =A0 if (IS_ERR(type)) >> + =A0 =A0 =A0 =A0 =A0 =A0 return; >> + >> + =A0 =A0 if (strncmp(type, "phy", 3) =3D=3D 0) { >> + =A0 =A0 =A0 =A0 =A0 =A0 err =3D xenbus_gather(XBT_NIL, info->xbdev->ot= herend, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "discard-granularity", "%u", &= discard_granularity, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "discard-alignment", "%u", &di= scard_alignment, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 NULL); > > Let me repeat my wish to have these nodes start with "trim-" rather > than "discard-", so they can be easily associated with the "feature-trim" > one. > > Jan > >> + =A0 =A0 =A0 =A0 =A0 =A0 if (!err) { >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->feature_trim =3D 1; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->discard_granularity =3D = discard_granularity; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->discard_alignment =3D di= scard_alignment; >> + =A0 =A0 =A0 =A0 =A0 =A0 } >> + =A0 =A0 } else if (strncmp(type, "file", 4) =3D=3D 0) >> + =A0 =A0 =A0 =A0 =A0 =A0 info->feature_trim =3D 1; >> + >> + =A0 =A0 kfree(type); >> +} >> + >> =A0/* >> =A0 * Invoked when the backend is finally 'ready' (and has told produced >> =A0 * the details about the physical device - #sectors, size, etc). >> @@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info >> *info) >> =A0 =A0 =A0 unsigned long sector_size; >> =A0 =A0 =A0 unsigned int binfo; >> =A0 =A0 =A0 int err; >> - =A0 =A0 int barrier, flush; >> + =A0 =A0 int barrier, flush, trim; >> >> =A0 =A0 =A0 switch (info->connected) { >> =A0 =A0 =A0 case BLKIF_STATE_CONNECTED: >> @@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info >> *info) >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->feature_flush =3D REQ_FLUSH; >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 info->flush_op =3D BLKIF_OP_FLUSH_DISKCACHE; >> =A0 =A0 =A0 } >> - >> + >> + =A0 =A0 err =3D xenbus_gather(XBT_NIL, info->xbdev->otherend, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "feature-trim", "%d", = &trim, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 NULL); >> + >> + =A0 =A0 if (!err && trim) >> + =A0 =A0 =A0 =A0 =A0 =A0 blkfront_setup_trim(info); >> + >> =A0 =A0 =A0 err =3D xlvbd_alloc_gendisk(sectors, info, binfo, sector_siz= e); >> =A0 =A0 =A0 if (err) { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 xenbus_dev_fatal(info->xbdev, err, "xlvbd_ad= d at %s", > > > >