All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jim Rees <rees@umich.edu>
To: Benny Halevy <bhalevy@panasas.com>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: [PATCH 26/33] pnfsblock: write_begin
Date: Mon, 13 Jun 2011 22:33:06 -0400	[thread overview]
Message-ID: <db22e7ddaae99181d46fa42233381595eaf243ec.1308017749.git.rees@umich.edu> (raw)
In-Reply-To: <cover.1308017749.git.rees@umich.edu>

From: Fred Isaman <iisaman@citi.umich.edu>

Implements bl_write_begin and bl_do_flush, allowing block driver to read
in page "around" the data that is about to be copied to the page.

[pnfsblock: fix 64-bit compiler warnings for write_begin]
[pnfsblock: write_begin adjust for removed fields]
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/blocklayout/blocklayout.c |  178 +++++++++++++++++++++++++++++++++++++-
 1 files changed, 177 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index d9bcb13..b9b961f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -31,6 +31,8 @@
  */
 #include <linux/module.h>
 #include <linux/init.h>
+
+#include <linux/buffer_head.h> /* various write calls */
 #include <linux/bio.h> /* struct bio */
 #include <linux/vmalloc.h>
 #include "blocklayout.h"
@@ -589,11 +591,185 @@ bl_clear_layoutdriver(struct nfs_server *server)
 	return 0;
 }
 
+/* STUB - mark intersection of layout and page as bad, so is not
+ * used again.
+ */
+static void mark_bad_read(void)
+{
+	return;
+}
+
+/* Copied from buffer.c */
+static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+{
+	if (uptodate) {
+		set_buffer_uptodate(bh);
+	} else {
+		/* This happens, due to failed READA attempts. */
+		clear_buffer_uptodate(bh);
+	}
+	unlock_buffer(bh);
+}
+
+/* Copied from buffer.c */
+static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
+{
+	__end_buffer_read_notouch(bh, uptodate);
+}
+
+/*
+ * map_block:  map a requested I/0 block (isect) into an offset in the LVM
+ * meta block_device
+ */
+static void
+map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
+{
+	dprintk("%s enter be=%p\n", __func__, be);
+
+	set_buffer_mapped(bh);
+	bh->b_bdev = be->be_mdev;
+	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
+		(be->be_mdev->bd_inode->i_blkbits - 9);
+
+	dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
+				__func__, (long)isect,
+				(long)bh->b_blocknr,
+				bh->b_size);
+	return;
+}
+
+/* Given an unmapped page, zero it (or read in page for COW),
+ * and set appropriate flags/markings, but it is safe to not initialize
+ * the range given in [from, to).
+ */
+/* This is loosely based on nobh_write_begin */
+static int
+init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
+		    unsigned from, unsigned to, sector_t **pages_to_mark)
+{
+	struct buffer_head *bh;
+	int inval, ret = -EIO;
+	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
+	sector_t isect;
+
+	dprintk("%s enter, %p\n", __func__, page);
+	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
+	if (!bh) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
+	be = find_get_extent(bl, isect, &cow_read);
+	if (!be)
+		goto cleanup;
+	inval = is_hole(be, isect);
+	dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
+	if (inval) {
+		if (be->be_state == PNFS_BLOCK_NONE_DATA) {
+			dprintk("%s PANIC - got NONE_DATA extent %p\n",
+				__func__, be);
+			goto cleanup;
+		}
+		map_block(isect, be, bh);
+		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+	}
+	if (PageUptodate(page)) {
+		/* Do nothing */
+	} else if (inval & !cow_read) {
+		zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
+	} else if (0 < from || PAGE_CACHE_SIZE > to) {
+		struct pnfs_block_extent *read_extent;
+
+		read_extent = (inval && cow_read) ? cow_read : be;
+		map_block(isect, read_extent, bh);
+		lock_buffer(bh);
+		bh->b_end_io = end_buffer_read_nobh;
+		submit_bh(READ, bh);
+		dprintk("%s: Waiting for buffer read\n", __func__);
+		/* XXX Don't really want to hold layout lock here */
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh))
+			goto cleanup;
+	}
+	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+		/* There is a BUG here if is a short copy after write_begin,
+		 * but I think this is a generic fs bug.  The problem is that
+		 * we have marked the page as initialized, but it is possible
+		 * that the section not copied may never get copied.
+		 */
+		ret = mark_initialized_sectors(be->be_inval, isect,
+					       PAGE_CACHE_SECTORS,
+					       pages_to_mark);
+		/* Want to preallocate mem so above can't fail */
+		if (ret)
+			goto cleanup;
+	}
+	SetPageMappedToDisk(page);
+	ret = 0;
+
+cleanup:
+	free_buffer_head(bh);
+	put_extent(be);
+	put_extent(cow_read);
+	if (ret) {
+		/* Need to mark layout with bad read...should now
+		 * just use nfs4 for reads and writes.
+		 */
+		mark_bad_read();
+	}
+	return ret;
+}
+
 static int
 bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
 	       unsigned count, struct pnfs_fsdata *fsdata)
 {
-	return 0;
+	unsigned from, to;
+	int ret;
+	sector_t *pages_to_mark = NULL;
+	struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
+
+	dprintk("%s enter, %u@%lld\n", __func__, count, pos);
+	print_page(page);
+	/* The following code assumes blocksize >= PAGE_CACHE_SIZE */
+	if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
+		dprintk("%s Can't handle blocksize %llu\n", __func__,
+			(u64)bl->bl_blocksize);
+		put_lseg(fsdata->lseg);
+		fsdata->lseg = NULL;
+		return 0;
+	}
+	if (PageMappedToDisk(page)) {
+		/* Basically, this is a flag that says we have
+		 * successfully called write_begin already on this page.
+		 */
+		/* NOTE - there are cache consistency issues here.
+		 * For example, what if the layout is recalled, then regained?
+		 * If the file is closed and reopened, will the page flags
+		 * be reset?  If not, we'll have to use layout info instead of
+		 * the page flag.
+		 */
+		return 0;
+	}
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = from + count;
+	ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
+	if (ret) {
+		dprintk("%s init page failed with %i", __func__, ret);
+		/* Revert back to plain NFS and just continue on with
+		 * write.  This assumes there is no request attached, which
+		 * should be true if we get here.
+		 */
+		BUG_ON(PagePrivate(page));
+		put_lseg(fsdata->lseg);
+		fsdata->lseg = NULL;
+		kfree(pages_to_mark);
+		ret = 0;
+	} else {
+		fsdata->private = pages_to_mark;
+	}
+	return ret;
 }
 
 static int
-- 
1.7.4.1


  parent reply	other threads:[~2011-06-14  2:33 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-14  2:31 [PATCH 00/33] v2 block layout patches Jim Rees
2011-06-14  2:32 ` [PATCH 01/33] pnfs: GETDEVICELIST Jim Rees
2011-06-14  2:32 ` [PATCH 02/33] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-14  2:32 ` [PATCH 03/33] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-14  2:32 ` [PATCH 04/33] pnfs: hook nfs_write_begin/end to allow layout driver manipulation Jim Rees
2011-06-14  2:32 ` [PATCH 05/33] pnfs: ask for layout_blksize and save it in nfs_server Jim Rees
2011-06-14  2:32 ` [PATCH 06/33] pnfs: cleanup_layoutcommit Jim Rees
2011-06-14  2:32 ` [PATCH 07/33] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-14  2:32 ` [PATCH 08/33] pnfsblock: blocklayout stub Jim Rees
2011-06-14  2:32 ` [PATCH 09/33] pnfsblock: layout alloc and free Jim Rees
2011-06-14  2:32 ` [PATCH 10/33] pnfsblock: add support for simple rpc pipefs Jim Rees
2011-06-14 15:52   ` Benny Halevy
2011-06-14  2:32 ` [PATCH 11/33] pnfsblock: add block device discovery pipe Jim Rees
2011-06-14  2:32 ` [PATCH 12/33] pnfsblock: basic extent code Jim Rees
2011-06-14  2:32 ` [PATCH 13/33] pnfsblock: add device operations Jim Rees
2011-06-14  2:32 ` [PATCH 14/33] pnfsblock: remove " Jim Rees
2011-06-14  2:32 ` [PATCH 15/33] pnfsblock: lseg alloc and free Jim Rees
2011-06-14  2:32 ` [PATCH 16/33] pnfsblock: merge extents Jim Rees
2011-06-14  2:32 ` [PATCH 17/33] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-14  2:32 ` [PATCH 18/33] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-14  2:32 ` [PATCH 19/33] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-14  2:32 ` [PATCH 20/33] pnfsblock: find_get_extent Jim Rees
2011-06-14  2:32 ` [PATCH 21/33] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-14  2:32 ` [PATCH 22/33] pnfsblock: merge rw extents Jim Rees
2011-06-14  2:32 ` [PATCH 23/33] pnfsblock: encode_layoutcommit Jim Rees
2011-06-14  2:33 ` [PATCH 24/33] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-14  2:33 ` [PATCH 25/33] pnfsblock: bl_read_pagelist Jim Rees
2011-06-14  2:33 ` Jim Rees [this message]
2011-06-14  2:33 ` [PATCH 27/33] pnfsblock: write_end Jim Rees
2011-06-14  2:33 ` [PATCH 28/33] pnfsblock: write_end_cleanup Jim Rees
2011-06-14  2:33 ` [PATCH 29/33] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-14  2:33 ` [PATCH 30/33] pnfsblock: bl_write_pagelist Jim Rees
2011-06-14  2:33 ` [PATCH 31/33] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-14  2:33 ` [PATCH 32/33] pnfsblock: Implement release_inval_marks Jim Rees
2011-06-14  2:33 ` [PATCH 33/33] pnfsblock DEVONLY: Add configurable prefetch size for layoutget Jim Rees

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=db22e7ddaae99181d46fa42233381595eaf243ec.1308017749.git.rees@umich.edu \
    --to=rees@umich.edu \
    --cc=bhalevy@panasas.com \
    --cc=honey@citi.umich.edu \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.