All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <npiggin@suse.de>
To: Linux Filesystems <linux-fsdevel@vger.kernel.org>
Cc: Linux Kernel <linux-kernel@vger.kernel.org>,
	Nick Piggin <npiggin@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <akpm@linux-foundation.org>
Subject: [patch 3/3] ext2: use perform_write aop
Date: Thu,  8 Feb 2007 14:07:46 +0100 (CET)	[thread overview]
Message-ID: <20070208105508.26443.7806.sendpatchset@linux.site> (raw)
In-Reply-To: <20070208105437.26443.35653.sendpatchset@linux.site>

Convert ext2 to use ->perform_write. This uses the main loop out of
generic_perform_write, but when encountering a short usercopy, it
zeroes out new uninitialised blocks, and passes in a short-length commit
to __block_commit_write, which does the right thing (in terms of not
setting things uptodate).

 fs/buffer.c                 |  143 ++++++++++++++++++++++++++++++++++++++++++++
 fs/ext2/inode.c             |    7 ++
 include/linux/buffer_head.h |    1 
 include/linux/pagemap.h     |    2 
 4 files changed, 153 insertions(+)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -1866,6 +1866,50 @@ next_bh:
 	return err;
 }
 
+void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+{
+	unsigned int block_start, block_end;
+	struct buffer_head *head, *bh;
+
+	BUG_ON(!PageLocked(page));
+	if (!page_has_buffers(page))
+		return;
+
+	bh = head = page_buffers(page);
+	block_start = 0;
+	do {
+		block_end = block_start + bh->b_size;
+
+		if (buffer_new(bh)) {
+			if (block_end > from && block_start < to) {
+				if (!PageUptodate(page)) {
+					unsigned start, end;
+					void *kaddr;
+
+					start = max(from, block_start);
+					end = min(to, block_end);
+
+					kaddr = kmap_atomic(page, KM_USER0);
+					memset(kaddr+start, 0, block_end-end);
+					flush_dcache_page(page);
+					kunmap_atomic(kaddr, KM_USER0);
+					set_buffer_uptodate(bh);
+				}
+
+				/*
+				 * XXX: make buffer_new behaviour more
+				 * consistent.
+				 * clear_buffer_new(bh);
+				 */
+				mark_buffer_dirty(bh);
+			}
+		}
+
+		block_start = block_end;
+		bh = bh->b_this_page;
+	} while (bh != head);
+}
+
 static int __block_commit_write(struct inode *inode, struct page *page,
 		unsigned from, unsigned to)
 {
@@ -1900,6 +1944,105 @@ static int __block_commit_write(struct i
 	return 0;
 }
 
+ssize_t block_perform_write(struct file *file, struct iovec_iterator *i,
+					loff_t pos, get_block_t *get_block)
+{
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	long status = 0;
+	ssize_t written = 0;
+
+	do {
+		struct page *page;
+		pgoff_t index;		/* Pagecache index for current page */
+		unsigned long offset;	/* Offset into pagecache page */
+		unsigned long bytes;	/* Bytes to write to page */
+		size_t copied;		/* Bytes copied from user */
+
+		offset = (pos & (PAGE_CACHE_SIZE - 1));
+		index = pos >> PAGE_CACHE_SHIFT;
+		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+						iovec_iterator_count(i));
+
+		/*
+		 * Bring in the user page that we will copy from _first_.
+		 * Otherwise there's a nasty deadlock on copying from the
+		 * same page as we're writing to, without it being marked
+		 * up-to-date.
+		 *
+		 * Not only is this an optimisation, but it is also required
+		 * to check that the address is actually valid, when atomic
+		 * usercopies are used, below.
+		 */
+		if (unlikely(iovec_iterator_fault_in_readable(i))) {
+			status = -EFAULT;
+			break;
+		}
+
+		page = __grab_cache_page(mapping, index);
+		if (!page) {
+			status = -ENOMEM;
+			break;
+		}
+
+		status = __block_prepare_write(inode, page, offset,
+						offset+bytes, get_block);
+		if (unlikely(status)) {
+			ClearPageUptodate(page);
+
+			page_cache_release(page);
+
+			/*
+			 * prepare_write() may have instantiated a few blocks
+			 * outside i_size.  Trim these off again. Don't need
+			 * i_size_read because we hold i_mutex.
+			 */
+			if (pos + bytes > inode->i_size)
+				vmtruncate(inode, inode->i_size);
+			break;
+		}
+
+		/*
+		 * Must not enter the pagefault handler here, because
+		 * we hold the page lock. See mm/filemap.c for more
+		 * details.
+		 */
+		pagefault_disable();
+		copied = iovec_iterator_copy_from_user_atomic(page, i,
+							offset, bytes);
+		pagefault_enable();
+		if (unlikely(copied < bytes))
+			page_zero_new_buffers(page, offset+copied, offset+bytes);
+		flush_dcache_page(page);
+
+		/* This could be a short (even 0-length) commit */
+		__block_commit_write(inode, page, offset, offset+copied);
+
+		unlock_page(page);
+		mark_page_accessed(page);
+		page_cache_release(page);
+
+		iovec_iterator_advance(i, copied);
+		pos += copied;
+		written += copied;
+
+		balance_dirty_pages_ratelimited(mapping);
+		cond_resched();
+
+	} while (iovec_iterator_count(i));
+
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_mutex.
+	 */
+	if (pos > inode->i_size) {
+		i_size_write(inode, pos);
+		mark_inode_dirty(inode);
+	}
+
+	return written ? written : status;
+}
+
 /*
  * Generic "read page" function for block devices that have the normal
  * get_block functionality. This is most of the block device filesystems.
Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c
+++ linux-2.6/fs/ext2/inode.c
@@ -642,6 +642,12 @@ ext2_readpages(struct file *file, struct
 	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
 }
 
+static ssize_t
+ext2_perform_write(struct file *file, struct iovec_iterator *i, loff_t pos)
+{
+	return block_perform_write(file, i, pos, ext2_get_block);
+}
+
 static int
 ext2_prepare_write(struct file *file, struct page *page,
 			unsigned from, unsigned to)
@@ -689,6 +695,7 @@ const struct address_space_operations ex
 	.readpages		= ext2_readpages,
 	.writepage		= ext2_writepage,
 	.sync_page		= block_sync_page,
+	.perform_write		= ext2_perform_write,
 	.prepare_write		= ext2_prepare_write,
 	.commit_write		= generic_commit_write,
 	.bmap			= ext2_bmap,
Index: linux-2.6/include/linux/buffer_head.h
===================================================================
--- linux-2.6.orig/include/linux/buffer_head.h
+++ linux-2.6/include/linux/buffer_head.h
@@ -198,6 +198,7 @@ void block_invalidatepage(struct page *p
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
+ssize_t block_perform_write(struct file *, struct iovec_iterator*, loff_t, get_block_t*);
 int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 				loff_t *);
Index: linux-2.6/include/linux/pagemap.h
===================================================================
--- linux-2.6.orig/include/linux/pagemap.h
+++ linux-2.6/include/linux/pagemap.h
@@ -87,6 +87,8 @@ unsigned find_get_pages_contig(struct ad
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages);
 
+struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index);
+
 /*
  * Returns locked page at given index in given cache, creating it if needed.
  */

  parent reply	other threads:[~2007-02-08 13:08 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-02-08 13:07 [rfc][patch 0/3] a faster buffered write deadlock fix? Nick Piggin
2007-02-08 13:07 ` [patch 1/3] fs: add an iovec iterator Nick Piggin
2007-02-08 19:49   ` Christoph Hellwig
2007-02-09  1:46     ` Nick Piggin
2007-02-09  2:03       ` Nate Diller
2007-02-09  3:31         ` Nick Piggin
2007-02-09 17:28           ` Zach Brown
2007-03-09 10:40         ` Christoph Hellwig
2007-02-08 23:04   ` Mark Fasheh
2007-02-08 13:07 ` [patch 2/3] fs: introduce perform_write aop Nick Piggin
2007-03-09 10:39   ` Christoph Hellwig
2007-03-09 12:52     ` Nick Piggin
2007-03-09 22:01       ` Anton Altaparmakov
2007-03-09 23:33     ` Mark Fasheh
2007-03-10  9:25       ` Christoph Hellwig
2007-03-12  2:13         ` Mark Fasheh
2007-03-14 13:30         ` Nick Piggin
2007-03-14 15:17           ` Christoph Hellwig
2007-02-08 13:07 ` Nick Piggin [this message]
2007-02-08 14:47   ` [patch 3/3] ext2: use " Dmitriy Monakhov
2007-02-09 19:14   ` Andrew Morton
2007-02-09 19:45     ` Andrew Morton
2007-02-10  1:34       ` Nick Piggin
2007-02-10  1:50         ` Andrew Morton
2007-02-09  0:38 ` [rfc][patch 0/3] a faster buffered write deadlock fix? Mark Fasheh
2007-02-09  2:04   ` Nick Piggin
2007-02-09  8:41 ` Andrew Morton
2007-02-09  9:54   ` Nick Piggin
2007-02-09 10:09     ` Andrew Morton
2007-02-09 10:32       ` Nick Piggin
2007-02-09 10:52         ` Andrew Morton
2007-02-09 11:31           ` Nick Piggin
2007-02-09 11:46             ` Andrew Morton
2007-02-09 12:11               ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070208105508.26443.7806.sendpatchset@linux.site \
    --to=npiggin@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.