From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from int-mailstore01.merit.edu ([207.75.116.232]:53819 "EHLO int-mailstore01.merit.edu" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755936Ab1FGR3s (ORCPT ); Tue, 7 Jun 2011 13:29:48 -0400 Date: Tue, 7 Jun 2011 13:29:46 -0400 From: Jim Rees To: Benny Halevy Cc: linux-nfs@vger.kernel.org, peter honeyman Subject: [PATCH 33/88] pnfsblock: bl_write_pagelist Message-ID: <15ecb4872bab4715ed7a275160e36f82e23c9e89.1307464382.git.rees@umich.edu> References: Content-Type: text/plain; charset=us-ascii In-Reply-To: Sender: linux-nfs-owner@vger.kernel.org List-ID: MIME-Version: 1.0 From: Fred Isaman Note: When upper layer's read/write request cannot be fulfilled, the block layout driver shouldn't silently mark the page as error. It should do what can be done and leave the rest to the upper layer. To do so, we should set rdata/wdata->res.count properly. When upper layer re-send the read/write request to finish the rest part of the request, pgbase is the position where we should start at. Signed-off-by: Fred Isaman [pnfsblock: handle errors when read or write pagelist.] Signed-off-by: Zhang Jingwang Signed-off-by: Benny Halevy --- fs/nfs/blocklayout/blocklayout.c | 141 ++++++++++++++++++++++++++++++++++++- 1 files changed, 137 insertions(+), 4 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9c46f5a..df0cfe4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -335,9 +335,69 @@ bl_read_pagelist(struct pnfs_layout_type *lo, return PNFS_NOT_ATTEMPTED; } -/* FRED - It can indicate bytes written in wdata->res.count. - * It can indicate error status in wdata->task.tk_status. +/* STUB - this needs thought */ +static inline void +bl_done_with_wpage(struct page *page, const int ok) +{ + if (!ok) { + SetPageError(page); + SetPagePnfsErr(page); + /* This is an inline copy of nfs_zap_mapping */ + /* This is oh so fishy, and needs deep thought */ + if (page->mapping->nrpages != 0) { + struct inode *inode = page->mapping->host; + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); + } + } + /* end_page_writeback called in rpc_release. Should be done here. */ +} + +/* This is basically copied from mpage_end_io_read */ +static void bl_end_io_write(struct bio *bio, int err) +{ + void *data = bio->bi_private; + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + bl_done_with_wpage(page, uptodate); + } while (bvec >= bio->bi_io_vec); + bio_put(bio); + put_parallel(data); +} + +/* Function scheduled for call during bl_end_par_io_write, + * it marks sectors as written and extends the commitlist. */ +static void bl_write_cleanup(struct work_struct *work) +{ + struct rpc_task *task; + struct nfs_write_data *wdata; + dprintk("%s enter\n", __func__); + task = container_of(work, struct rpc_task, u.tk_work); + wdata = container_of(task, struct nfs_write_data, task); + pnfs_callback_ops->nfs_writelist_complete(wdata); +} + +/* Called when last of bios associated with a bl_write_pagelist call finishes */ +static void +bl_end_par_io_write(void *data) +{ + struct nfs_write_data *wdata = data; + + /* STUB - ignoring error handling */ + wdata->task.tk_status = 0; + wdata->verf.committed = NFS_FILE_SYNC; + INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); + schedule_work(&wdata->task.u.tk_work); +} + static enum pnfs_try_status bl_write_pagelist(struct pnfs_layout_type *lo, struct page **pages, @@ -348,8 +408,81 @@ bl_write_pagelist(struct pnfs_layout_type *lo, int sync, struct nfs_write_data *wdata) { - dprintk("%s enter - just using nfs\n", __func__); - return PNFS_NOT_ATTEMPTED; + int i; + struct bio *bio = NULL; + struct pnfs_block_extent *be = NULL; + sector_t isect, extent_length = 0; + struct parallel_io *par; + int pg_index = pgbase >> PAGE_CACHE_SHIFT; + + dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + if (!test_bit(PG_USE_PNFS, &wdata->req->wb_flags)) { + dprintk("PG_USE_PNFS not set\n"); + return PNFS_NOT_ATTEMPTED; + } + if (dont_like_caller(wdata->req)) { + dprintk("%s dont_like_caller failed\n", __func__); + return PNFS_NOT_ATTEMPTED; + } + /* At this point, wdata->pages is a (sequential) list of nfs_pages. + * We want to write each, and if there is an error remove it from + * list and call + * nfs_retry_request(req) to have it redone using nfs. + * QUEST? Do as block or per req? Think have to do per block + * as part of end_bio + */ + par = alloc_parallel(wdata); + if (!par) + return PNFS_NOT_ATTEMPTED; + par->call_ops = *wdata->pdata.call_ops; + par->call_ops.rpc_call_done = bl_rpc_do_nothing; + par->pnfs_callback = bl_end_par_io_write; + /* At this point, have to be more careful with error handling */ + + isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> 9); + for (i = pg_index; i < nr_pages; i++) { + if (!extent_length) { + /* We've used up the previous extent */ + put_extent(be); + bio = bl_submit_bio(WRITE, bio); + /* Get the next one */ + be = find_get_extent(BLK_LSEG2EXT(wdata->pdata.lseg), + isect, NULL); + if (!be || !is_writable(be, isect)) { + /* FIXME */ + bl_done_with_wpage(pages[i], 0); + break; + } + extent_length = be->be_length - + (isect - be->be_f_offset); + } + for (;;) { + if (!bio) { + bio = bio_alloc(GFP_NOIO, nr_pages - i); + if (!bio) { + /* Error out this page */ + /* FIXME */ + bl_done_with_wpage(pages[i], 0); + break; + } + bio->bi_sector = isect - be->be_f_offset + + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = bl_end_io_write; + bio->bi_private = par; + } + if (bio_add_page(bio, pages[i], PAGE_SIZE, 0)) + break; + bio = bl_submit_bio(WRITE, bio); + } + isect += PAGE_CACHE_SIZE >> 9; + extent_length -= PAGE_CACHE_SIZE >> 9; + } + wdata->res.count = (isect << 9) - (offset & (long)PAGE_CACHE_MASK); + put_extent(be); + bl_submit_bio(WRITE, bio); + put_parallel(par); + return PNFS_ATTEMPTED; } /* FIXME - range ignored */ -- 1.7.4.1