From: Jim Rees <rees@umich.edu>
To: Benny Halevy <bhalevy@panasas.com>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: [PATCH 29/88] pnfsblock: write_begin
Date: Tue, 7 Jun 2011 13:29:16 -0400 [thread overview]
Message-ID: <9f0047679d383a01f328c19d56ea5967f39b7ae6.1307464382.git.rees@umich.edu> (raw)
In-Reply-To: <cover.1307464381.git.rees@umich.edu>
From: Fred Isaman <iisaman@citi.umich.edu>
Implements bl_write_begin and bl_do_flush, allowing block driver to read
in page "around" the data that is about to be copied to the page.
[pnfsblock: fix 64-bit compiler warnings for write_begin]
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
fs/nfs/blocklayout/blocklayout.c | 196 ++++++++++++++++++++++++++++++++++++++
1 files changed, 196 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 99de9e3..b3ad99d 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/buffer_head.h> /* various write calls */
#include <linux/bio.h> /* struct bio */
#include <linux/vmalloc.h>
#include "blocklayout.h"
@@ -637,6 +638,186 @@ bl_uninitialize_mountpoint(struct pnfs_mount_type *mtype)
return 0;
}
+/* STUB - mark intersection of layout and page as bad, so is not
+ * used again.
+ */
+static void mark_bad_read(void)
+{
+ return;
+}
+
+/* Copied from buffer.c */
+static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+{
+ if (uptodate) {
+ set_buffer_uptodate(bh);
+ } else {
+ /* This happens, due to failed READA attempts. */
+ clear_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+}
+
+/* Copied from buffer.c */
+static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
+{
+ __end_buffer_read_notouch(bh, uptodate);
+}
+
+/*
+ * map_block: map a requested I/0 block (isect) into an offset in the LVM
+ * meta block_device
+ */
+static void
+map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
+{
+ dprintk("%s enter be=%p\n", __func__, be);
+
+ set_buffer_mapped(bh);
+ bh->b_bdev = be->be_mdev;
+ bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
+ (be->be_mdev->bd_inode->i_blkbits - 9);
+
+ dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
+ __func__, (long)isect,
+ (long)bh->b_blocknr,
+ bh->b_size);
+ return;
+}
+
+/* Given an unmapped page, zero it (or read in page for COW),
+ * and set appropriate flags/markings, but it is safe to not initialize
+ * the range given in [from, to).
+ */
+/* This is loosely based on nobh_write_begin */
+static int
+init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
+ unsigned from, unsigned to, sector_t **pages_to_mark)
+{
+ struct buffer_head *bh;
+ int inval, ret = -EIO;
+ struct pnfs_block_extent *be = NULL, *cow_read = NULL;
+ sector_t isect;
+
+ dprintk("%s enter, %p\n", __func__, page);
+ bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
+ if (!bh) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
+ be = find_get_extent(bl, isect, &cow_read);
+ if (!be)
+ goto cleanup;
+ inval = is_hole(be, isect);
+ dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
+ if (inval) {
+ if (be->be_state == PNFS_BLOCK_NONE_DATA) {
+ dprintk("%s PANIC - got NONE_DATA extent %p\n",
+ __func__, be);
+ goto cleanup;
+ }
+ map_block(isect, be, bh);
+ unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ }
+ if (PageUptodate(page)) {
+ /* Do nothing */
+ } else if (inval & !cow_read) {
+ zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
+ } else if (0 < from || PAGE_CACHE_SIZE > to) {
+ struct pnfs_block_extent *read_extent;
+
+ read_extent = (inval && cow_read) ? cow_read : be;
+ map_block(isect, read_extent, bh);
+ lock_buffer(bh);
+ bh->b_end_io = end_buffer_read_nobh;
+ submit_bh(READ, bh);
+ dprintk("%s: Waiting for buffer read\n", __func__);
+ /* XXX Don't really want to hold layout lock here */
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ goto cleanup;
+ }
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ /* There is a BUG here if is a short copy after write_begin,
+ * but I think this is a generic fs bug. The problem is that
+ * we have marked the page as initialized, but it is possible
+ * that the section not copied may never get copied.
+ */
+ ret = mark_initialized_sectors(be->be_inval, isect,
+ PAGE_CACHE_SECTORS,
+ pages_to_mark);
+ /* Want to preallocate mem so above can't fail */
+ if (ret)
+ goto cleanup;
+ }
+ SetPageMappedToDisk(page);
+ ret = 0;
+
+cleanup:
+ free_buffer_head(bh);
+ put_extent(be);
+ put_extent(cow_read);
+ if (ret) {
+ /* Need to mark layout with bad read...should now
+ * just use nfs4 for reads and writes.
+ */
+ mark_bad_read();
+ }
+ return ret;
+}
+
+static int
+bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
+ unsigned count, struct pnfs_fsdata *fsdata)
+{
+ unsigned from, to;
+ int ret;
+ sector_t *pages_to_mark = NULL;
+ struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
+
+ dprintk("%s enter, %u@%lld\n", __func__, count, pos);
+ print_page(page);
+ /* The following code assumes blocksize >= PAGE_CACHE_SIZE */
+ if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
+ dprintk("%s Can't handle blocksize %llu\n", __func__,
+ (u64)bl->bl_blocksize);
+ fsdata->ok_to_use_pnfs = 0;
+ return 0;
+ }
+ fsdata->ok_to_use_pnfs = 1;
+ if (PageMappedToDisk(page)) {
+ /* Basically, this is a flag that says we have
+ * successfully called write_begin already on this page.
+ */
+ /* NOTE - there are cache consistency issues here.
+ * For example, what if the layout is recalled, then regained?
+ * If the file is closed and reopened, will the page flags
+ * be reset? If not, we'll have to use layout info instead of
+ * the page flag.
+ */
+ return 0;
+ }
+ from = pos & (PAGE_CACHE_SIZE - 1);
+ to = from + count;
+ ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
+ if (ret) {
+ dprintk("%s init page failed with %i", __func__, ret);
+ /* Revert back to plain NFS and just continue on with
+ * write. This assumes there is no request attached, which
+ * should be true if we get here.
+ */
+ BUG_ON(PagePrivate(page));
+ fsdata->ok_to_use_pnfs = 0;
+ kfree(pages_to_mark);
+ ret = 0;
+ } else {
+ fsdata->private = pages_to_mark;
+ }
+ return ret;
+}
+
static ssize_t
bl_get_stripesize(struct pnfs_layout_type *lo)
{
@@ -663,10 +844,24 @@ bl_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return 1;
}
+/* This checks if old req will likely use same io method as soon
+ * to be created request, and returns False if they are the same.
+ */
+static int
+bl_do_flush(struct pnfs_layout_segment *lseg, struct nfs_page *req,
+ struct pnfs_fsdata *fsdata)
+{
+ int will_try_pnfs;
+ dprintk("%s enter\n", __func__);
+ will_try_pnfs = fsdata ? (fsdata->ok_to_use_pnfs) : (lseg != NULL);
+ return will_try_pnfs != test_bit(PG_USE_PNFS, &req->wb_flags);
+}
+
static struct layoutdriver_io_operations blocklayout_io_operations = {
.commit = bl_commit,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
+ .write_begin = bl_write_begin,
.alloc_layout = bl_alloc_layout,
.free_layout = bl_free_layout,
.alloc_lseg = bl_alloc_lseg,
@@ -683,6 +878,7 @@ static struct layoutdriver_policy_operations blocklayout_policy_operations = {
.get_read_threshold = bl_get_io_threshold,
.get_write_threshold = bl_get_io_threshold,
.pg_test = bl_pg_test,
+ .do_flush = bl_do_flush,
};
static struct pnfs_layoutdriver_type blocklayout_type = {
--
1.7.4.1
next prev parent reply other threads:[~2011-06-07 17:29 UTC|newest]
Thread overview: 136+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-07 17:24 [PATCH 00/88] pnfs block layout driver rees
2011-06-07 17:26 ` [PATCH 01/88] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-07 17:26 ` [PATCH 02/88] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-07 17:26 ` [PATCH 03/88] pnfs_post_submit: Restore "pnfs: pnfs_do_flush" part 1 Jim Rees
2011-06-07 17:26 ` [PATCH 04/88] pnfs_post_submit: Restore the pnfs_write_end part of "pnfs: commit and pnfs_write_end" Jim Rees
2011-06-07 17:26 ` [PATCH 05/88] pnfs: xdr support for three word attribute bitmap Jim Rees
2011-06-07 17:26 ` [PATCH 06/88] pnfs: HACK: ask for layout_blksize on mount Jim Rees
2011-06-07 17:26 ` [PATCH 07/88] pnfs: HACK: modify write_end_cleanup Jim Rees
2011-06-07 17:26 ` [PATCH 08/88] HACK: propagate fsdata into nfs_writepage_setup Jim Rees
2011-06-07 17:26 ` [PATCH 09/88] pnfs: HACK: adjust eof handling Jim Rees
2011-06-07 17:27 ` [PATCH 10/88] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-07 17:27 ` [PATCH 11/88] pnfsblock: blocklayout stub Jim Rees
2011-06-07 17:27 ` [PATCH 12/88] pnfsblock: expose scsi interface Jim Rees
2011-06-07 17:27 ` [PATCH 13/88] pnfsblock: scan scsi devices Jim Rees
2011-06-07 17:27 ` [PATCH 14/88] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-07 17:27 ` [PATCH 15/88] pnfsblock: dm kernel interface Jim Rees
2011-06-07 17:27 ` [PATCH 16/88] pnfsblock: select BLK_DEV_DM when PNFS_BLOCK is configured Jim Rees
2011-06-07 17:27 ` [PATCH 17/88] pnfsblock: create and destroy dm metadevice Jim Rees
2011-06-07 17:27 ` [PATCH 18/88] pnfsblock: construct and load md table Jim Rees
2011-06-07 17:28 ` [PATCH 19/88] pnfsblock: layout alloc and free Jim Rees
2011-06-07 17:28 ` [PATCH 20/88] pnfsblock: basic extent code Jim Rees
2011-06-07 17:28 ` [PATCH 21/88] pnfsblock: lseg alloc and free Jim Rees
2011-06-07 17:28 ` [PATCH 22/88] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-07 17:28 ` [PATCH 23/88] pnfsblock: merge extents Jim Rees
2011-06-07 17:28 ` [PATCH 24/88] pnfsblock: find_get_extent Jim Rees
2011-06-07 17:28 ` [PATCH 25/88] pnfsblock: bl_read_pagelist Jim Rees
2011-06-07 17:28 ` [PATCH 26/88] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-07 17:29 ` [PATCH 27/88] pnfsblock: read path error handling Jim Rees
2011-06-07 17:29 ` [PATCH 28/88] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-07 17:29 ` Jim Rees [this message]
2011-06-07 17:29 ` [PATCH 30/88] pnfsblock: write_end Jim Rees
2011-06-07 17:29 ` [PATCH 31/88] pnfsblock: write_end_cleanup Jim Rees
2011-06-07 17:29 ` [PATCH 32/88] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-07 17:29 ` [PATCH 33/88] pnfsblock: bl_write_pagelist Jim Rees
2011-06-07 17:29 ` [PATCH 34/88] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 35/88] pnfsblock: bl_setup_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 36/88] pnfsblock: encode_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 37/88] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 38/88] pnfsblock: merge rw extents Jim Rees
2011-06-07 17:30 ` [PATCH 39/88] pnfsblock: debugging dprintks for clist info Jim Rees
2011-06-07 17:30 ` [PATCH 40/88] SQAUSHME: blocklayoutdriver: NULL pointer reference when committing too many extents Jim Rees
2011-06-07 17:30 ` [PATCH 41/88] SQUASHME: pnfs-block: remove of CONFIG_PNFS fallout Jim Rees
2011-06-07 17:30 ` [PATCH 42/88] SQUASHME: pnfsblock: Fix a memory leak Jim Rees
2011-06-07 17:31 ` [PATCH 43/88] SQUASHME: pnfsblock: fix bug when decoding block device info Jim Rees
2011-06-07 17:31 ` [PATCH 44/88] SQUASHME: pnfsblock: Wrong extent refcount in block extents list Jim Rees
2011-06-07 17:31 ` [PATCH 45/88] SQUASHME: pnfsblock: Implement release_inval_marks Jim Rees
2011-06-07 17:31 ` [PATCH 46/88] SQUASHME: pnfsblock: Fix missing extent in commit list Jim Rees
2011-06-07 17:31 ` [PATCH 47/88] pnfsblock: use the session max response size for getdeviceinfo's maxcount Jim Rees
2011-06-07 17:31 ` [PATCH 48/88] SQUASHME: pnfs-block: fix compile breakage Jim Rees
2011-06-07 17:31 ` [PATCH 49/88] SQUASHME: pnfs-block: convert APIs pnfs-post-submit Jim Rees
2011-06-07 17:32 ` [PATCH 50/88] pnfsblock: Lookup list entry of layouts and tags in reverse order Jim Rees
2011-06-07 17:32 ` [PATCH 51/88] pnfsblock: expose block_class interface Jim Rees
2011-06-07 17:32 ` [PATCH 52/88] pnfsblock: iterating all local block disks instead of only scsi disks when initializing mount point Jim Rees
2011-06-07 17:32 ` [PATCH 53/88] SQUASHME: pnfsblock: set pnfs_blksize before calling set_pnfs_layoutdriver Jim Rees
2011-06-07 17:32 ` [PATCH 54/88] SQUASHME: pnfsblock: get rid of threshold policy ops Jim Rees
2011-06-07 17:32 ` [PATCH 55/88] SQUASHME: pnfsblock: write_begin adjust for removed fields Jim Rees
2011-06-07 17:32 ` [PATCH 56/88] SQUASHME: pnfsblock: write_end adjust for removed ok_to_use_pnfs Jim Rees
2011-06-07 17:32 ` [PATCH 57/88] SQUASHME: pnfsblock: write_end_cleanup " Jim Rees
2011-06-07 17:32 ` [PATCH 58/88] SQUASHME: pnfsblock: bl_write_pagelist support functions adjust for missing PG_USE_PNFS Jim Rees
2011-06-07 17:33 ` [PATCH 59/88] SQUASHME: pnfsblock: bl_write_pagelist " Jim Rees
2011-06-07 17:33 ` [PATCH 60/88] SQUASHME: pnfs-block: nfs4_blk_add_block_disk ret must be signed Jim Rees
2011-06-07 17:33 ` [PATCH 61/88] SQUASHME: pnfs-block: use new alloc/free_layout API Jim Rees
2011-06-07 17:33 ` [PATCH 62/88] SQUASHME: pnfs-block: use new commit api Jim Rees
2011-06-07 17:33 ` [PATCH 63/88] SQUASHME: pnfs-block: use new read_pagelist api Jim Rees
2011-06-07 17:33 ` [PATCH 64/88] SQUASHME: pnfs-block: use new write_pagelist api Jim Rees
2011-06-07 17:33 ` [PATCH 65/88] pnfs-block: Add support for simple rpc pipefs Jim Rees
2011-06-07 17:33 ` [PATCH 66/88] pnfs-block: Remove device creation from kernel Jim Rees
2011-06-07 17:33 ` [PATCH 67/88] SQUASHME: pnfs-block: apply types rename Jim Rees
2011-06-07 17:34 ` [PATCH 68/88] SQUASHME: pnfs-block: Revert "pnfsblock: expose block_class interface" Jim Rees
2011-06-07 17:34 ` [PATCH 69/88] SQUASHME: pnfsblock: remove obsolete include file from blocklayout.h Jim Rees
2011-06-07 17:34 ` [PATCH 70/88] SQUASHME: pnfsblock: use nfs4_deviceid Jim Rees
2011-06-07 17:34 ` [PATCH 71/88] SQUASHME: pnfsblock: no callback ops Jim Rees
2011-06-07 17:34 ` [PATCH 72/88] SQAUSHME: pnfsblock: no PNFS_NFS_SERVER Jim Rees
2011-06-07 17:34 ` [PATCH 73/88] SQUASHME: pnfsblock: no dev_notify_types Jim Rees
2011-06-07 17:34 ` [PATCH 74/88] SQUASHME: pnfsblock: use new struct pnfs_layout_hdr Jim Rees
2011-06-07 17:34 ` [PATCH 75/88] SQUASHME: pnfsblock: compile error in blocklayout code Jim Rees
2011-06-07 17:34 ` [PATCH 76/88] SQUASHME: pnfs-block: deprecate get_stripesize Jim Rees
2011-06-07 17:35 ` [PATCH 77/88] move include lines out of include file Jim Rees
2011-06-07 17:35 ` [PATCH 78/88] SQUASHME: pnfs-block: use {set,clear}_layoutdriver Jim Rees
2011-06-07 17:35 ` [PATCH 79/88] SQUASHME: pnfs-block: Return failure from bl_initialize_mountpoint Jim Rees
2011-06-07 17:35 ` [PATCH 80/88] SQUASHME: pnfs-block: fixup setup_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 81/88] SQUASHME: pnfs-block: fixup cleanup_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 82/88] SQUASHME: pnfs-block: fixup encode_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 83/88] SQUASHME: pnfs-block: fixup layoutcommit methods args Jim Rees
2011-06-07 17:35 ` [PATCH 84/88] pnfs-block: fix blocklayoutdev.c for new blkdev_get_by_dev() Jim Rees
2011-06-07 17:35 ` [PATCH 85/88] SQUASHME: pnfs-block: use pnfs_layout_hdr field prefix Jim Rees
2011-06-07 17:35 ` [PATCH 86/88] SQUASHME: pnfs: blocklayout: port block layout code Jim Rees
2011-06-08 1:27 ` Benny Halevy
2011-06-08 2:06 ` Benny Halevy
2011-06-08 7:38 ` Peng Tao
2011-06-07 17:36 ` [PATCH 87/88] Add configurable prefetch size for layoutget Jim Rees
2011-06-08 2:01 ` Benny Halevy
2011-06-08 2:18 ` Jim Rees
2011-06-08 7:15 ` Peng Tao
2011-06-09 6:06 ` Benny Halevy
2011-06-09 11:49 ` Jim Rees
2011-06-09 13:32 ` Benny Halevy
2011-06-09 13:58 ` Jim Rees
2011-06-09 15:07 ` Peng Tao
2011-06-09 21:22 ` Benny Halevy
2011-06-10 6:00 ` tao.peng
2011-06-10 12:33 ` Benny Halevy
2011-06-10 14:09 ` tao.peng
2011-06-10 19:23 ` Benny Halevy
2011-06-10 20:03 ` Fred Isaman
2011-06-10 21:15 ` Benny Halevy
2011-06-11 1:46 ` Peng Tao
2011-06-10 23:20 ` Boaz Harrosh
2011-06-11 2:19 ` Peng Tao
2011-06-12 14:40 ` Boaz Harrosh
2011-06-12 18:46 ` Peng Tao
2011-06-11 1:35 ` Peng Tao
2011-06-09 21:23 ` Benny Halevy
2011-06-10 5:36 ` tao.peng
2011-06-10 12:36 ` Benny Halevy
2011-06-10 14:17 ` tao.peng
2011-06-10 19:02 ` Benny Halevy
2011-06-09 15:01 ` Peng Tao
2011-06-09 14:54 ` Peng Tao
2011-06-09 21:30 ` Benny Halevy
2011-06-10 6:02 ` tao.peng
2011-06-10 12:47 ` Benny Halevy
2011-06-10 14:30 ` tao.peng
2011-06-10 19:07 ` Benny Halevy
2011-06-10 16:23 ` Boaz Harrosh
2011-06-10 16:44 ` Boaz Harrosh
2011-06-09 6:08 ` Benny Halevy
2011-06-07 17:36 ` [PATCH 88/88] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-08 2:05 ` Benny Halevy
2011-06-08 7:06 ` Peng Tao
2011-06-08 7:29 ` Peng Tao
2011-06-09 21:52 ` [PATCH 00/88] pnfs block layout driver Boaz Harrosh
2011-06-09 22:15 ` Jim Rees
2011-06-10 2:16 ` Boaz Harrosh
2011-06-10 2:20 ` Boaz Harrosh
2011-06-10 4:04 ` Benny Halevy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9f0047679d383a01f328c19d56ea5967f39b7ae6.1307464382.git.rees@umich.edu \
--to=rees@umich.edu \
--cc=bhalevy@panasas.com \
--cc=honey@citi.umich.edu \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).