All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/12] VM: Split out the accounting of unstable writes from BDI_RECLAIMABLE
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

From: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>

Signed-off-by: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Acked-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
Reviewed-by: Wu Fengguang <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---

 fs/nfs/write.c              |    6 +++---
 include/linux/backing-dev.h |    3 ++-
 mm/backing-dev.c            |    6 ++++--
 mm/filemap.c                |    2 +-
 mm/page-writeback.c         |   16 ++++++++++------
 mm/truncate.c               |    2 +-
 6 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696..7ba56f8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,7 +440,7 @@ nfs_mark_request_commit(struct nfs_page *req)
 			NFS_PAGE_TAG_COMMIT);
 	spin_unlock(&inode->i_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_UNSTABLE);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
 
@@ -451,7 +451,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 
 	if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
 		dec_zone_page_state(page, NR_UNSTABLE_NFS);
-		dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+		dec_bdi_stat(page->mapping->backing_dev_info, BDI_UNSTABLE);
 		return 1;
 	}
 	return 0;
@@ -1322,7 +1322,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		nfs_mark_request_commit(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
-				BDI_RECLAIMABLE);
+				BDI_UNSTABLE);
 		nfs_clear_page_tag_locked(req);
 	}
 	return -ENOMEM;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index fcbc26a..42c3e2a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -36,7 +36,8 @@ enum bdi_state {
 typedef int (congested_fn)(void *, int);
 
 enum bdi_stat_item {
-	BDI_RECLAIMABLE,
+	BDI_DIRTY,
+	BDI_UNSTABLE,
 	BDI_WRITEBACK,
 	NR_BDI_STAT_ITEMS
 };
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..88f3655 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -88,7 +88,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 	seq_printf(m,
 		   "BdiWriteback:     %8lu kB\n"
-		   "BdiReclaimable:   %8lu kB\n"
+		   "BdiDirty:         %8lu kB\n"
+		   "BdiUnstable:      %8lu kB\n"
 		   "BdiDirtyThresh:   %8lu kB\n"
 		   "DirtyThresh:      %8lu kB\n"
 		   "BackgroundThresh: %8lu kB\n"
@@ -102,7 +103,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		   "wb_list:          %8u\n"
 		   "wb_cnt:           %8u\n",
 		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
-		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
+		   (unsigned long) K(bdi_stat(bdi, BDI_DIRTY)),
+		   (unsigned long) K(bdi_stat(bdi, BDI_UNSTABLE)),
 		   K(bdi_thresh), K(dirty_thresh),
 		   K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
 		   !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0..458387d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -136,7 +136,7 @@ void __remove_from_page_cache(struct page *page)
 	 */
 	if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
 		dec_zone_page_state(page, NR_FILE_DIRTY);
-		dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+		dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
 	}
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b19943..23d3fc6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -272,7 +272,8 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
 	else
 		avail_dirty = 0;
 
-	avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +
+	avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
+		bdi_stat(bdi, BDI_UNSTABLE) +
 		bdi_stat(bdi, BDI_WRITEBACK);
 
 	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
@@ -509,7 +510,8 @@ static void balance_dirty_pages(struct address_space *mapping,
 					global_page_state(NR_UNSTABLE_NFS);
 		nr_writeback = global_page_state(NR_WRITEBACK);
 
-		bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+		bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+				     bdi_stat(bdi, BDI_UNSTABLE);
 		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 
 		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -554,10 +556,12 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * deltas.
 		 */
 		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
-			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
+			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+					     bdi_stat_sum(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
 		} else if (bdi_nr_reclaimable) {
-			bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+			bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+					     bdi_stat(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 		}
 
@@ -1079,7 +1083,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 {
 	if (mapping_cap_account_dirty(mapping)) {
 		__inc_zone_page_state(page, NR_FILE_DIRTY);
-		__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+		__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
 		task_dirty_inc(current);
 		task_io_account_write(PAGE_CACHE_SIZE);
 	}
@@ -1255,7 +1259,7 @@ int clear_page_dirty_for_io(struct page *page)
 		if (TestClearPageDirty(page)) {
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 			dec_bdi_stat(mapping->backing_dev_info,
-					BDI_RECLAIMABLE);
+					BDI_DIRTY);
 			return 1;
 		}
 		return 0;
diff --git a/mm/truncate.c b/mm/truncate.c
index e87e372..2466e0c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -75,7 +75,7 @@ void cancel_dirty_page(struct page *page, unsigned int account_size)
 		if (mapping && mapping_cap_account_dirty(mapping)) {
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 			dec_bdi_stat(mapping->backing_dev_info,
-					BDI_RECLAIMABLE);
+					BDI_DIRTY);
 			if (account_size)
 				task_io_account_cancelled_write(account_size);
 		}

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 04/12] NFS: Reduce the number of unnecessary COMMIT calls
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

If the caller is doing a non-blocking flush, and there are still writebacks
pending on the wire, we can usually defer the COMMIT call until those
writes are done.

Also ensure that we honour the wbc->nonblocking flag.

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/write.c |   17 ++++++++++++++---
 1 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e2e7464..0bc1d5b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1409,12 +1409,23 @@ static int nfs_commit_inode(struct inode *inode, int how)
 
 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
 {
-	int ret;
+	int flags = FLUSH_SYNC;
+	int ret = 0;
 
-	ret = nfs_commit_inode(inode,
-			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+	/* Don't commit yet if this is a non-blocking flush and there are
+	 * outstanding writes for this mapping.
+	 */
+	if (wbc->sync_mode != WB_SYNC_ALL &&
+	    radix_tree_tagged(&NFS_I(inode)->nfs_page_tree,
+		    NFS_PAGE_TAG_LOCKED))
+		goto out_mark_dirty;
+
+	if (wbc->nonblocking)
+		flags = 0;
+	ret = nfs_commit_inode(inode, flags);
 	if (ret >= 0)
 		return 0;
+out_mark_dirty:
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	return ret;
 }

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 05/12] VM/NFS: The VM must tell the filesystem when to free reclaimable pages
  2010-01-25 22:15 [PATCH 00/12] Re: [PATCH] improve the performance of large sequential write NFS workloads Trond Myklebust
@ 2010-01-25 22:15 ` Trond Myklebust
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  2010-01-25 22:15 ` [PATCH 11/12] NFS: Clean up nfs_sync_mapping Trond Myklebust
  2 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

balance_dirty_pages() should really tell the filesystem whether or not it
has an excess of actual dirty pages, or whether it would be more useful to
start freeing up the unstable writes.

Assume that if the number of unstable writes is more than 1/2 the number of
reclaimable pages, then we should force NFS to free up the former.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---

 fs/nfs/write.c            |    2 +-
 include/linux/writeback.h |    5 +++++
 mm/page-writeback.c       |   12 ++++++++++--
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0bc1d5b..9feafab 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1415,7 +1415,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
 	/* Don't commit yet if this is a non-blocking flush and there are
 	 * outstanding writes for this mapping.
 	 */
-	if (wbc->sync_mode != WB_SYNC_ALL &&
+	if (!wbc->force_commit_unstable && wbc->sync_mode != WB_SYNC_ALL &&
 	    radix_tree_tagged(&NFS_I(inode)->nfs_page_tree,
 		    NFS_PAGE_TAG_LOCKED))
 		goto out_mark_dirty;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 76e8903..8229139 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -62,6 +62,11 @@ struct writeback_control {
 	 * so we use a single control to update them
 	 */
 	unsigned no_nrwrite_index_update:1;
+	/*
+	 * The following is used by balance_dirty_pages() to
+	 * force NFS to commit unstable pages.
+	 */
+	unsigned force_commit_unstable:1;
 };
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c06739b..6a0aec7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -503,6 +503,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 			.nr_to_write	= write_chunk,
 			.range_cyclic	= 1,
 		};
+		long bdi_nr_unstable = 0;
 
 		get_dirty_limits(&background_thresh, &dirty_thresh,
 				&bdi_thresh, bdi);
@@ -512,8 +513,10 @@ static void balance_dirty_pages(struct address_space *mapping,
 		nr_writeback = global_page_state(NR_WRITEBACK);
 
 		bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
-		if (bdi_cap_account_unstable(bdi))
-			bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
+		if (bdi_cap_account_unstable(bdi)) {
+			bdi_nr_unstable = bdi_stat(bdi, BDI_UNSTABLE);
+			bdi_nr_reclaimable += bdi_nr_unstable;
+		}
 		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 
 		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -541,6 +544,11 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * up.
 		 */
 		if (bdi_nr_reclaimable > bdi_thresh) {
+			wbc.force_commit_unstable = 0;
+			/* Force NFS to also free up unstable writes. */
+			if (bdi_nr_unstable > bdi_nr_reclaimable / 2)
+				wbc.force_commit_unstable = 1;
+
 			writeback_inodes_wbc(&wbc);
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 08/12] NFS: Simplify nfs_wb_page_cancel()
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

In all cases we should be able to just remove the request and call
cancel_dirty_page().

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/write.c         |   39 +--------------------------------------
 include/linux/nfs_fs.h |    2 --
 2 files changed, 1 insertions(+), 40 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2d78f08..53df027 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -539,19 +539,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
 	return res;
 }
 
-static void nfs_cancel_commit_list(struct list_head *head)
-{
-	struct nfs_page *req;
-
-	while(!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_clear_request_commit(req);
-		nfs_inode_remove_request(req);
-		nfs_unlock_request(req);
-	}
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1484,13 +1471,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
-		if (how & FLUSH_INVALIDATE) {
-			spin_unlock(&inode->i_lock);
-			nfs_cancel_commit_list(&head);
-			ret = pages;
-			spin_lock(&inode->i_lock);
-			continue;
-		}
 		pages += nfs_scan_commit(inode, &head, 0, 0);
 		spin_unlock(&inode->i_lock);
 		ret = nfs_commit_list(inode, &head, how);
@@ -1547,26 +1527,13 @@ int nfs_wb_nocommit(struct inode *inode)
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 {
 	struct nfs_page *req;
-	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = range_start,
-		.range_end = range_end,
-	};
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
 	for (;;) {
 		req = nfs_page_find_request(page);
 		if (req == NULL)
-			goto out;
-		if (test_bit(PG_CLEAN, &req->wb_flags)) {
-			nfs_release_request(req);
 			break;
-		}
 		if (nfs_lock_request_dontget(req)) {
 			nfs_inode_remove_request(req);
 			/*
@@ -1579,12 +1546,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 		}
 		ret = nfs_wait_on_request(req);
 		if (ret < 0)
-			goto out;
+			break;
 	}
-	if (!PagePrivate(page))
-		return 0;
-	ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
-out:
 	return ret;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 384ea3e..1eec414 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -34,8 +34,6 @@
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
 #define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
-#define FLUSH_INVALIDATE	64	/* Invalidate the page cache */
-#define FLUSH_NOWRITEPAGE	128	/* Don't call writepage() */
 
 #ifdef __KERNEL__
 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 02/12] VM: Don't call bdi_stat(BDI_UNSTABLE) on non-nfs backing-devices
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

Speeds up the accounting in balance_dirty_pages() for non-nfs devices.

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Acked-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
Acked-by: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Reviewed-by: Wu Fengguang <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---

 fs/nfs/client.c             |    1 +
 include/linux/backing-dev.h |    6 ++++++
 mm/page-writeback.c         |   16 +++++++++++-----
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee77713..d0b060a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -890,6 +890,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
 
 	server->backing_dev_info.name = "nfs";
 	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+	server->backing_dev_info.capabilities |= BDI_CAP_ACCT_UNSTABLE;
 
 	if (server->wsize > max_rpc_payload)
 		server->wsize = max_rpc_payload;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 42c3e2a..8b45166 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -232,6 +232,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #define BDI_CAP_EXEC_MAP	0x00000040
 #define BDI_CAP_NO_ACCT_WB	0x00000080
 #define BDI_CAP_SWAP_BACKED	0x00000100
+#define BDI_CAP_ACCT_UNSTABLE	0x00000200
 
 #define BDI_CAP_VMFLAGS \
 	(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -311,6 +312,11 @@ static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
 	return bdi == &default_backing_dev_info;
 }
 
+static inline bool bdi_cap_account_unstable(struct backing_dev_info *bdi)
+{
+	return bdi->capabilities & BDI_CAP_ACCT_UNSTABLE;
+}
+
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
 	return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 23d3fc6..c06739b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -273,8 +273,9 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
 		avail_dirty = 0;
 
 	avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
-		bdi_stat(bdi, BDI_UNSTABLE) +
 		bdi_stat(bdi, BDI_WRITEBACK);
+	if (bdi_cap_account_unstable(bdi))
+		avail_dirty += bdi_stat(bdi, BDI_UNSTABLE);
 
 	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
 }
@@ -510,8 +511,9 @@ static void balance_dirty_pages(struct address_space *mapping,
 					global_page_state(NR_UNSTABLE_NFS);
 		nr_writeback = global_page_state(NR_WRITEBACK);
 
-		bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
-				     bdi_stat(bdi, BDI_UNSTABLE);
+		bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+		if (bdi_cap_account_unstable(bdi))
+			bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
 		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 
 		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -556,11 +558,15 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * deltas.
 		 */
 		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
-			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY);
+			if (bdi_cap_account_unstable(bdi))
+				bdi_nr_reclaimable +=
 					     bdi_stat_sum(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
 		} else if (bdi_nr_reclaimable) {
-			bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+			bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+			if (bdi_cap_account_unstable(bdi))
+				bdi_nr_reclaimable +=
 					     bdi_stat(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 		}

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

Now that we have correct COMMIT semantics in writeback_single_inode...

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/write.c         |   39 ++++++---------------------------------
 include/linux/nfs_fs.h |    1 -
 2 files changed, 6 insertions(+), 34 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..747b40e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
-	int nocommit = how & FLUSH_NOCOMMIT;
 	long pages, ret;
 
 	/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 				npages = 0;
 		}
 	}
-	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		if (nocommit)
-			break;
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
@@ -1481,47 +1477,24 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	return ret;
 }
 
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	int ret;
-
-	ret = nfs_writepages(mapping, wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, wbc, how);
-	if (ret < 0)
-		goto out;
-	return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
 {
 	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 	};
 
-	return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, 0);
+	return sync_inode(inode, &wbc);
 }
 
 int nfs_wb_nocommit(struct inode *inode)
 {
-	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+	return filemap_write_and_wait(inode->i_mapping);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..c536caf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
 
 #ifdef __KERNEL__
 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 03/12] NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

The sole purpose of nfs_write_inode is to commit unstable writes, so
move it into fs/nfs/write.c, and make nfs_commit_inode static.

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/inode.c         |   12 ------------
 fs/nfs/write.c         |   24 +++++++++++++++++++++++-
 include/linux/nfs_fs.h |    7 -------
 3 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 302aa89..8341709 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	int ret;
-
-	ret = nfs_commit_inode(inode,
-			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
-	if (ret >= 0)
-		return 0;
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	return ret;
-}
-
 void nfs_clear_inode(struct inode *inode)
 {
 	/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7ba56f8..e2e7464 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
 	int res;
@@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how)
 	}
 	return res;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	int ret;
+
+	ret = nfs_commit_inode(inode,
+			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+	if (ret >= 0)
+		return 0;
+	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
+}
 #else
 static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
 	return 0;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	return 0;
+}
 #endif
 
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return nfs_commit_unstable_pages(inode, wbc);
+}
+
 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
 	struct inode *inode = mapping->host;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d09db1b..384ea3e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
 #endif
 
 static inline int

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 06/12] NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Acked-by: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Acked-by: Wu Fengguang <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---

 fs/nfs/write.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9feafab..b0b3890 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1420,7 +1420,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
 		    NFS_PAGE_TAG_LOCKED))
 		goto out_mark_dirty;
 
-	if (wbc->nonblocking)
+	if (wbc->nonblocking || wbc->for_background)
 		flags = 0;
 	ret = nfs_commit_inode(inode, flags);
 	if (ret >= 0)

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 00/12] Re: [PATCH] improve the performance of large sequential write NFS workloads
@ 2010-01-25 22:15 Trond Myklebust
  2010-01-25 22:15 ` [PATCH 05/12] VM/NFS: The VM must tell the filesystem when to free reclaimable pages Trond Myklebust
                   ` (2 more replies)
  0 siblings, 3 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

This patch series is a follow up to the series that was posted on Jan
8th. Following the discussion with Christoph and Al, I've rebased the
patch series on top of Al's 'write_inode' branch (see
http://git.kernel.org/?p=linux/kernel/git/viro/vfs-2.6.git;a=summary).

The main changes compared to the previous iteration of these patches are
as follows:
  - Rewrite to take advantage of Christoph's improvements to the
    s_op->write_inode() callback, and writeback_single_inode().

  - Added a bunch of cleanups to simplify the NFS writeback code by
    using sync_inode() where possible.

  - Appended a tentative fix for the mmap() lockdep issue.

If you would like to test it out, please feel free to pull the
"performance-for-next" branch from
        git://git.linux-nfs.org/projects/trondmy/nfs-2.6.git

I hope to start submitting this into the linux-next tree within a few
days unless there are objections.

Cheers
  Trond

---

Peter Zijlstra (1):
      VM: Split out the accounting of unstable writes from BDI_RECLAIMABLE

Trond Myklebust (11):
      NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping
      NFS: Clean up nfs_sync_mapping
      NFS: Simplify nfs_wb_page()
      NFS: Replace __nfs_write_mapping with sync_inode()
      NFS: Simplify nfs_wb_page_cancel()
      NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages
      NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set
      VM/NFS: The VM must tell the filesystem when to free reclaimable pages
      NFS: Reduce the number of unnecessary COMMIT calls
      NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c
      VM: Don't call bdi_stat(BDI_UNSTABLE) on non-nfs backing-devices


 fs/nfs/client.c             |    1 
 fs/nfs/dir.c                |    2 
 fs/nfs/inode.c              |   67 +-----------
 fs/nfs/symlink.c            |    2 
 fs/nfs/write.c              |  233 ++++++++++++-------------------------------
 include/linux/backing-dev.h |    9 +-
 include/linux/nfs_fs.h      |   12 --
 include/linux/writeback.h   |    5 +
 mm/backing-dev.c            |    6 +
 mm/filemap.c                |    2 
 mm/page-writeback.c         |   30 ++++--
 mm/truncate.c               |    2 
 12 files changed, 118 insertions(+), 253 deletions(-)

-- 
Signature

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH 10/12] NFS: Simplify nfs_wb_page()
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/write.c         |  118 +++++++++---------------------------------------
 include/linux/nfs_fs.h |    1 
 2 files changed, 22 insertions(+), 97 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 747b40e..830613d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -501,44 +501,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
 }
 #endif
 
-/*
- * Wait for a request to complete.
- *
- * Interruptible by fatal signals only.
- */
-static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_page *req;
-	pgoff_t idx_end, next;
-	unsigned int		res = 0;
-	int			error;
-
-	if (npages == 0)
-		idx_end = ~0;
-	else
-		idx_end = idx_start + npages - 1;
-
-	next = idx_start;
-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
-		if (req->wb_index > idx_end)
-			break;
-
-		next = req->wb_index + 1;
-		BUG_ON(!NFS_WBACK_BUSY(req));
-
-		kref_get(&req->wb_kref);
-		spin_unlock(&inode->i_lock);
-		error = nfs_wait_on_request(req);
-		nfs_release_request(req);
-		spin_lock(&inode->i_lock);
-		if (error < 0)
-			return error;
-		res++;
-	}
-	return res;
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1437,46 +1399,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return nfs_commit_unstable_pages(inode, wbc);
 }
 
-long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	struct inode *inode = mapping->host;
-	pgoff_t idx_start, idx_end;
-	unsigned int npages = 0;
-	LIST_HEAD(head);
-	long pages, ret;
-
-	/* FIXME */
-	if (wbc->range_cyclic)
-		idx_start = 0;
-	else {
-		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (idx_end > idx_start) {
-			pgoff_t l_npages = 1 + idx_end - idx_start;
-			npages = l_npages;
-			if (sizeof(npages) != sizeof(l_npages) &&
-					(pgoff_t)npages != l_npages)
-				npages = 0;
-		}
-	}
-	spin_lock(&inode->i_lock);
-	do {
-		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
-		if (ret != 0)
-			continue;
-		pages = nfs_scan_commit(inode, &head, idx_start, npages);
-		if (pages == 0)
-			break;
-		pages += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&inode->i_lock);
-		ret = nfs_commit_list(inode, &head, how);
-		spin_lock(&inode->i_lock);
-
-	} while (ret >= 0);
-	spin_unlock(&inode->i_lock);
-	return ret;
-}
-
 /*
  * flush the inode to disk.
  */
@@ -1524,45 +1446,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 	return ret;
 }
 
-static int nfs_wb_page_priority(struct inode *inode, struct page *page,
-				int how)
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page *page)
 {
 	loff_t range_start = page_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
 	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
+		.nr_to_write = 0,
 		.range_start = range_start,
 		.range_end = range_end,
 	};
+	struct nfs_page *req;
+	int need_commit;
 	int ret;
 
-	do {
+	while(PagePrivate(page)) {
 		if (clear_page_dirty_for_io(page)) {
 			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
-		} else if (!PagePrivate(page))
+		}
+		req = nfs_find_and_lock_request(page);
+		if (!req)
 			break;
-		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
-		if (ret < 0)
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
 			goto out_error;
-	} while (PagePrivate(page));
+		}
+		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
+		nfs_clear_page_tag_locked(req);
+		if (need_commit) {
+			ret = nfs_commit_inode(inode, FLUSH_SYNC);
+			if (ret < 0)
+				goto out_error;
+		}
+	}
 	return 0;
 out_error:
-	__mark_inode_dirty(inode, I_DIRTY_PAGES);
 	return ret;
 }
 
-/*
- * Write back all requests on one page - we do this before reading it.
- */
-int nfs_wb_page(struct inode *inode, struct page* page)
-{
-	return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
-}
-
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 		struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c536caf..6ee74c5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -474,7 +474,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 07/12] NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

Since nfs_scan_list() doesn't wait for locked pages, we have a race in
which it is possible to end up with an inode that needs to send a COMMIT,
but which does not have the I_DIRTY_DATASYNC flag set.

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/write.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b0b3890..2d78f08 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -573,11 +573,15 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
 
 	if (!nfs_need_commit(nfsi))
 		return 0;
 
-	return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	if (nfs_need_commit(NFS_I(inode)))
+		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
 }
 #else
 static inline int nfs_need_commit(struct nfs_inode *nfsi)

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 01/12] VM: Split out the accounting of unstable writes from BDI_RECLAIMABLE
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

From: Peter Zijlstra <peterz@infradead.org>

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
---

 fs/nfs/write.c              |    6 +++---
 include/linux/backing-dev.h |    3 ++-
 mm/backing-dev.c            |    6 ++++--
 mm/filemap.c                |    2 +-
 mm/page-writeback.c         |   16 ++++++++++------
 mm/truncate.c               |    2 +-
 6 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696..7ba56f8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,7 +440,7 @@ nfs_mark_request_commit(struct nfs_page *req)
 			NFS_PAGE_TAG_COMMIT);
 	spin_unlock(&inode->i_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_UNSTABLE);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
 
@@ -451,7 +451,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 
 	if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
 		dec_zone_page_state(page, NR_UNSTABLE_NFS);
-		dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+		dec_bdi_stat(page->mapping->backing_dev_info, BDI_UNSTABLE);
 		return 1;
 	}
 	return 0;
@@ -1322,7 +1322,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		nfs_mark_request_commit(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
-				BDI_RECLAIMABLE);
+				BDI_UNSTABLE);
 		nfs_clear_page_tag_locked(req);
 	}
 	return -ENOMEM;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index fcbc26a..42c3e2a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -36,7 +36,8 @@ enum bdi_state {
 typedef int (congested_fn)(void *, int);
 
 enum bdi_stat_item {
-	BDI_RECLAIMABLE,
+	BDI_DIRTY,
+	BDI_UNSTABLE,
 	BDI_WRITEBACK,
 	NR_BDI_STAT_ITEMS
 };
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..88f3655 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -88,7 +88,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 	seq_printf(m,
 		   "BdiWriteback:     %8lu kB\n"
-		   "BdiReclaimable:   %8lu kB\n"
+		   "BdiDirty:         %8lu kB\n"
+		   "BdiUnstable:      %8lu kB\n"
 		   "BdiDirtyThresh:   %8lu kB\n"
 		   "DirtyThresh:      %8lu kB\n"
 		   "BackgroundThresh: %8lu kB\n"
@@ -102,7 +103,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		   "wb_list:          %8u\n"
 		   "wb_cnt:           %8u\n",
 		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
-		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
+		   (unsigned long) K(bdi_stat(bdi, BDI_DIRTY)),
+		   (unsigned long) K(bdi_stat(bdi, BDI_UNSTABLE)),
 		   K(bdi_thresh), K(dirty_thresh),
 		   K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
 		   !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0..458387d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -136,7 +136,7 @@ void __remove_from_page_cache(struct page *page)
 	 */
 	if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
 		dec_zone_page_state(page, NR_FILE_DIRTY);
-		dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+		dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
 	}
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b19943..23d3fc6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -272,7 +272,8 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
 	else
 		avail_dirty = 0;
 
-	avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +
+	avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
+		bdi_stat(bdi, BDI_UNSTABLE) +
 		bdi_stat(bdi, BDI_WRITEBACK);
 
 	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
@@ -509,7 +510,8 @@ static void balance_dirty_pages(struct address_space *mapping,
 					global_page_state(NR_UNSTABLE_NFS);
 		nr_writeback = global_page_state(NR_WRITEBACK);
 
-		bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+		bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+				     bdi_stat(bdi, BDI_UNSTABLE);
 		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 
 		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -554,10 +556,12 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * deltas.
 		 */
 		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
-			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
+			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+					     bdi_stat_sum(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
 		} else if (bdi_nr_reclaimable) {
-			bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+			bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+					     bdi_stat(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 		}
 
@@ -1079,7 +1083,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 {
 	if (mapping_cap_account_dirty(mapping)) {
 		__inc_zone_page_state(page, NR_FILE_DIRTY);
-		__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+		__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
 		task_dirty_inc(current);
 		task_io_account_write(PAGE_CACHE_SIZE);
 	}
@@ -1255,7 +1259,7 @@ int clear_page_dirty_for_io(struct page *page)
 		if (TestClearPageDirty(page)) {
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 			dec_bdi_stat(mapping->backing_dev_info,
-					BDI_RECLAIMABLE);
+					BDI_DIRTY);
 			return 1;
 		}
 		return 0;
diff --git a/mm/truncate.c b/mm/truncate.c
index e87e372..2466e0c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -75,7 +75,7 @@ void cancel_dirty_page(struct page *page, unsigned int account_size)
 		if (mapping && mapping_cap_account_dirty(mapping)) {
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 			dec_bdi_stat(mapping->backing_dev_info,
-					BDI_RECLAIMABLE);
+					BDI_DIRTY);
 			if (account_size)
 				task_io_account_cancelled_write(account_size);
 		}


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 04/12] NFS: Reduce the number of unnecessary COMMIT calls
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

If the caller is doing a non-blocking flush, and there are still writebacks
pending on the wire, we can usually defer the COMMIT call until those
writes are done.

Also ensure that we honour the wbc->nonblocking flag.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/write.c |   17 ++++++++++++++---
 1 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e2e7464..0bc1d5b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1409,12 +1409,23 @@ static int nfs_commit_inode(struct inode *inode, int how)
 
 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
 {
-	int ret;
+	int flags = FLUSH_SYNC;
+	int ret = 0;
 
-	ret = nfs_commit_inode(inode,
-			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+	/* Don't commit yet if this is a non-blocking flush and there are
+	 * outstanding writes for this mapping.
+	 */
+	if (wbc->sync_mode != WB_SYNC_ALL &&
+	    radix_tree_tagged(&NFS_I(inode)->nfs_page_tree,
+		    NFS_PAGE_TAG_LOCKED))
+		goto out_mark_dirty;
+
+	if (wbc->nonblocking)
+		flags = 0;
+	ret = nfs_commit_inode(inode, flags);
 	if (ret >= 0)
 		return 0;
+out_mark_dirty:
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	return ret;
 }


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 08/12] NFS: Simplify nfs_wb_page_cancel()
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

In all cases we should be able to just remove the request and call
cancel_dirty_page().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/write.c         |   39 +--------------------------------------
 include/linux/nfs_fs.h |    2 --
 2 files changed, 1 insertions(+), 40 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2d78f08..53df027 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -539,19 +539,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
 	return res;
 }
 
-static void nfs_cancel_commit_list(struct list_head *head)
-{
-	struct nfs_page *req;
-
-	while(!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_clear_request_commit(req);
-		nfs_inode_remove_request(req);
-		nfs_unlock_request(req);
-	}
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1484,13 +1471,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
-		if (how & FLUSH_INVALIDATE) {
-			spin_unlock(&inode->i_lock);
-			nfs_cancel_commit_list(&head);
-			ret = pages;
-			spin_lock(&inode->i_lock);
-			continue;
-		}
 		pages += nfs_scan_commit(inode, &head, 0, 0);
 		spin_unlock(&inode->i_lock);
 		ret = nfs_commit_list(inode, &head, how);
@@ -1547,26 +1527,13 @@ int nfs_wb_nocommit(struct inode *inode)
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 {
 	struct nfs_page *req;
-	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = range_start,
-		.range_end = range_end,
-	};
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
 	for (;;) {
 		req = nfs_page_find_request(page);
 		if (req == NULL)
-			goto out;
-		if (test_bit(PG_CLEAN, &req->wb_flags)) {
-			nfs_release_request(req);
 			break;
-		}
 		if (nfs_lock_request_dontget(req)) {
 			nfs_inode_remove_request(req);
 			/*
@@ -1579,12 +1546,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 		}
 		ret = nfs_wait_on_request(req);
 		if (ret < 0)
-			goto out;
+			break;
 	}
-	if (!PagePrivate(page))
-		return 0;
-	ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
-out:
 	return ret;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 384ea3e..1eec414 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -34,8 +34,6 @@
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
 #define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
-#define FLUSH_INVALIDATE	64	/* Invalidate the page cache */
-#define FLUSH_NOWRITEPAGE	128	/* Don't call writepage() */
 
 #ifdef __KERNEL__
 


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 02/12] VM: Don't call bdi_stat(BDI_UNSTABLE) on non-nfs backing-devices
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

Speeds up the accounting in balance_dirty_pages() for non-nfs devices.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
---

 fs/nfs/client.c             |    1 +
 include/linux/backing-dev.h |    6 ++++++
 mm/page-writeback.c         |   16 +++++++++++-----
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee77713..d0b060a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -890,6 +890,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
 
 	server->backing_dev_info.name = "nfs";
 	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+	server->backing_dev_info.capabilities |= BDI_CAP_ACCT_UNSTABLE;
 
 	if (server->wsize > max_rpc_payload)
 		server->wsize = max_rpc_payload;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 42c3e2a..8b45166 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -232,6 +232,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #define BDI_CAP_EXEC_MAP	0x00000040
 #define BDI_CAP_NO_ACCT_WB	0x00000080
 #define BDI_CAP_SWAP_BACKED	0x00000100
+#define BDI_CAP_ACCT_UNSTABLE	0x00000200
 
 #define BDI_CAP_VMFLAGS \
 	(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -311,6 +312,11 @@ static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
 	return bdi == &default_backing_dev_info;
 }
 
+static inline bool bdi_cap_account_unstable(struct backing_dev_info *bdi)
+{
+	return bdi->capabilities & BDI_CAP_ACCT_UNSTABLE;
+}
+
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
 	return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 23d3fc6..c06739b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -273,8 +273,9 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
 		avail_dirty = 0;
 
 	avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
-		bdi_stat(bdi, BDI_UNSTABLE) +
 		bdi_stat(bdi, BDI_WRITEBACK);
+	if (bdi_cap_account_unstable(bdi))
+		avail_dirty += bdi_stat(bdi, BDI_UNSTABLE);
 
 	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
 }
@@ -510,8 +511,9 @@ static void balance_dirty_pages(struct address_space *mapping,
 					global_page_state(NR_UNSTABLE_NFS);
 		nr_writeback = global_page_state(NR_WRITEBACK);
 
-		bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
-				     bdi_stat(bdi, BDI_UNSTABLE);
+		bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+		if (bdi_cap_account_unstable(bdi))
+			bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
 		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 
 		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -556,11 +558,15 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * deltas.
 		 */
 		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
-			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY);
+			if (bdi_cap_account_unstable(bdi))
+				bdi_nr_reclaimable +=
 					     bdi_stat_sum(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
 		} else if (bdi_nr_reclaimable) {
-			bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+			bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+			if (bdi_cap_account_unstable(bdi))
+				bdi_nr_reclaimable +=
 					     bdi_stat(bdi, BDI_UNSTABLE);
 			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
 		}


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

Now that we have correct COMMIT semantics in writeback_single_inode...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/write.c         |   39 ++++++---------------------------------
 include/linux/nfs_fs.h |    1 -
 2 files changed, 6 insertions(+), 34 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..747b40e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
-	int nocommit = how & FLUSH_NOCOMMIT;
 	long pages, ret;
 
 	/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 				npages = 0;
 		}
 	}
-	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		if (nocommit)
-			break;
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
@@ -1481,47 +1477,24 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	return ret;
 }
 
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	int ret;
-
-	ret = nfs_writepages(mapping, wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, wbc, how);
-	if (ret < 0)
-		goto out;
-	return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
 {
 	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 	};
 
-	return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, 0);
+	return sync_inode(inode, &wbc);
 }
 
 int nfs_wb_nocommit(struct inode *inode)
 {
-	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+	return filemap_write_and_wait(inode->i_mapping);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..c536caf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
 
 #ifdef __KERNEL__
 


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 03/12] NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

The sole purpose of nfs_write_inode is to commit unstable writes, so
move it into fs/nfs/write.c, and make nfs_commit_inode static.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/inode.c         |   12 ------------
 fs/nfs/write.c         |   24 +++++++++++++++++++++++-
 include/linux/nfs_fs.h |    7 -------
 3 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 302aa89..8341709 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	int ret;
-
-	ret = nfs_commit_inode(inode,
-			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
-	if (ret >= 0)
-		return 0;
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	return ret;
-}
-
 void nfs_clear_inode(struct inode *inode)
 {
 	/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7ba56f8..e2e7464 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
 	int res;
@@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how)
 	}
 	return res;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	int ret;
+
+	ret = nfs_commit_inode(inode,
+			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+	if (ret >= 0)
+		return 0;
+	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
+}
 #else
 static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
 	return 0;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	return 0;
+}
 #endif
 
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return nfs_commit_unstable_pages(inode, wbc);
+}
+
 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
 	struct inode *inode = mapping->host;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d09db1b..384ea3e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
 #endif
 
 static inline int


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 06/12] NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
---

 fs/nfs/write.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9feafab..b0b3890 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1420,7 +1420,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
 		    NFS_PAGE_TAG_LOCKED))
 		goto out_mark_dirty;
 
-	if (wbc->nonblocking)
+	if (wbc->nonblocking || wbc->for_background)
 		flags = 0;
 	ret = nfs_commit_inode(inode, flags);
 	if (ret >= 0)


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/write.c         |  118 +++++++++---------------------------------------
 include/linux/nfs_fs.h |    1 
 2 files changed, 22 insertions(+), 97 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 747b40e..830613d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -501,44 +501,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
 }
 #endif
 
-/*
- * Wait for a request to complete.
- *
- * Interruptible by fatal signals only.
- */
-static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_page *req;
-	pgoff_t idx_end, next;
-	unsigned int		res = 0;
-	int			error;
-
-	if (npages == 0)
-		idx_end = ~0;
-	else
-		idx_end = idx_start + npages - 1;
-
-	next = idx_start;
-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
-		if (req->wb_index > idx_end)
-			break;
-
-		next = req->wb_index + 1;
-		BUG_ON(!NFS_WBACK_BUSY(req));
-
-		kref_get(&req->wb_kref);
-		spin_unlock(&inode->i_lock);
-		error = nfs_wait_on_request(req);
-		nfs_release_request(req);
-		spin_lock(&inode->i_lock);
-		if (error < 0)
-			return error;
-		res++;
-	}
-	return res;
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1437,46 +1399,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return nfs_commit_unstable_pages(inode, wbc);
 }
 
-long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	struct inode *inode = mapping->host;
-	pgoff_t idx_start, idx_end;
-	unsigned int npages = 0;
-	LIST_HEAD(head);
-	long pages, ret;
-
-	/* FIXME */
-	if (wbc->range_cyclic)
-		idx_start = 0;
-	else {
-		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (idx_end > idx_start) {
-			pgoff_t l_npages = 1 + idx_end - idx_start;
-			npages = l_npages;
-			if (sizeof(npages) != sizeof(l_npages) &&
-					(pgoff_t)npages != l_npages)
-				npages = 0;
-		}
-	}
-	spin_lock(&inode->i_lock);
-	do {
-		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
-		if (ret != 0)
-			continue;
-		pages = nfs_scan_commit(inode, &head, idx_start, npages);
-		if (pages == 0)
-			break;
-		pages += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&inode->i_lock);
-		ret = nfs_commit_list(inode, &head, how);
-		spin_lock(&inode->i_lock);
-
-	} while (ret >= 0);
-	spin_unlock(&inode->i_lock);
-	return ret;
-}
-
 /*
  * flush the inode to disk.
  */
@@ -1524,45 +1446,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 	return ret;
 }
 
-static int nfs_wb_page_priority(struct inode *inode, struct page *page,
-				int how)
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page *page)
 {
 	loff_t range_start = page_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
 	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
+		.nr_to_write = 0,
 		.range_start = range_start,
 		.range_end = range_end,
 	};
+	struct nfs_page *req;
+	int need_commit;
 	int ret;
 
-	do {
+	while(PagePrivate(page)) {
 		if (clear_page_dirty_for_io(page)) {
 			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
-		} else if (!PagePrivate(page))
+		}
+		req = nfs_find_and_lock_request(page);
+		if (!req)
 			break;
-		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
-		if (ret < 0)
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
 			goto out_error;
-	} while (PagePrivate(page));
+		}
+		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
+		nfs_clear_page_tag_locked(req);
+		if (need_commit) {
+			ret = nfs_commit_inode(inode, FLUSH_SYNC);
+			if (ret < 0)
+				goto out_error;
+		}
+	}
 	return 0;
 out_error:
-	__mark_inode_dirty(inode, I_DIRTY_PAGES);
 	return ret;
 }
 
-/*
- * Write back all requests on one page - we do this before reading it.
- */
-int nfs_wb_page(struct inode *inode, struct page* page)
-{
-	return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
-}
-
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 		struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c536caf..6ee74c5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -474,7 +474,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 07/12] NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

Since nfs_scan_list() doesn't wait for locked pages, we have a race in
which it is possible to end up with an inode that needs to send a COMMIT,
but which does not have the I_DIRTY_DATASYNC flag set.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/write.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b0b3890..2d78f08 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -573,11 +573,15 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
 
 	if (!nfs_need_commit(nfsi))
 		return 0;
 
-	return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	if (nfs_need_commit(NFS_I(inode)))
+		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
 }
 #else
 static inline int nfs_need_commit(struct nfs_inode *nfsi)


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 12/12] NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15     ` Trond Myklebust
  2010-01-25 22:15     ` Trond Myklebust
                       ` (8 subsequent siblings)
  9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/dir.c           |    2 +-
 fs/nfs/inode.c         |   41 +----------------------------------------
 fs/nfs/symlink.c       |    2 +-
 include/linux/nfs_fs.h |    1 -
 4 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b..a1f6b44 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	desc->entry = &my_entry;
 
 	nfs_block_sillyrename(dentry);
-	res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (res < 0)
 		goto out;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a867c4c..30988bc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -759,7 +759,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	return __nfs_revalidate_inode(server, inode);
 }
 
-static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	
@@ -780,49 +780,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
 	return 0;
 }
 
-static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
-	int ret = 0;
-
-	mutex_lock(&inode->i_mutex);
-	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
-		ret = nfs_sync_mapping(mapping);
-		if (ret == 0)
-			ret = nfs_invalidate_mapping_nolock(inode, mapping);
-	}
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-/**
- * nfs_revalidate_mapping_nolock - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int ret = 0;
-
-	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
-		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
-		if (ret < 0)
-			goto out;
-	}
-	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
-		ret = nfs_invalidate_mapping_nolock(inode, mapping);
-out:
-	return ret;
-}
-
 /**
  * nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
- *
- * This version of the function will take the inode->i_mutex and attempt to
- * flush out all dirty data if it needs to invalidate the page cache.
  */
 int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738d..2ea9e5c 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	struct page *page;
 	void *err;
 
-	err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
+	err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
 	if (err)
 		goto read_failed;
 	page = read_cache_page(&inode->i_data, 0,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6ee74c5..896a5f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -346,7 +346,6 @@ extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 11/12] NFS: Clean up nfs_sync_mapping
  2010-01-25 22:15 [PATCH 00/12] Re: [PATCH] improve the performance of large sequential write NFS workloads Trond Myklebust
  2010-01-25 22:15 ` [PATCH 05/12] VM/NFS: The VM must tell the filesystem when to free reclaimable pages Trond Myklebust
       [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
  2 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

Remove the redundant call to filemap_write_and_wait().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/inode.c |   14 +++++---------
 1 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8341709..a867c4c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -114,16 +114,12 @@ void nfs_clear_inode(struct inode *inode)
  */
 int nfs_sync_mapping(struct address_space *mapping)
 {
-	int ret;
+	int ret = 0;
 
-	if (mapping->nrpages == 0)
-		return 0;
-	unmap_mapping_range(mapping, 0, 0, 0);
-	ret = filemap_write_and_wait(mapping);
-	if (ret != 0)
-		goto out;
-	ret = nfs_wb_all(mapping->host);
-out:
+	if (mapping->nrpages != 0) {
+		unmap_mapping_range(mapping, 0, 0, 0);
+		ret = nfs_wb_all(mapping->host);
+	}
 	return ret;
 }
 


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH 12/12] NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping
@ 2010-01-25 22:15     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
  To: linux-nfs
  Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
	Christoph Hellwig, Al Viro

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/dir.c           |    2 +-
 fs/nfs/inode.c         |   41 +----------------------------------------
 fs/nfs/symlink.c       |    2 +-
 include/linux/nfs_fs.h |    1 -
 4 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b..a1f6b44 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	desc->entry = &my_entry;
 
 	nfs_block_sillyrename(dentry);
-	res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (res < 0)
 		goto out;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a867c4c..30988bc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -759,7 +759,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	return __nfs_revalidate_inode(server, inode);
 }
 
-static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	
@@ -780,49 +780,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
 	return 0;
 }
 
-static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
-	int ret = 0;
-
-	mutex_lock(&inode->i_mutex);
-	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
-		ret = nfs_sync_mapping(mapping);
-		if (ret == 0)
-			ret = nfs_invalidate_mapping_nolock(inode, mapping);
-	}
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-/**
- * nfs_revalidate_mapping_nolock - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int ret = 0;
-
-	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
-		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
-		if (ret < 0)
-			goto out;
-	}
-	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
-		ret = nfs_invalidate_mapping_nolock(inode, mapping);
-out:
-	return ret;
-}
-
 /**
  * nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
- *
- * This version of the function will take the inode->i_mutex and attempt to
- * flush out all dirty data if it needs to invalidate the page cache.
  */
 int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738d..2ea9e5c 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	struct page *page;
 	void *err;
 
-	err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
+	err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
 	if (err)
 		goto read_failed;
 	page = read_cache_page(&inode->i_data, 0,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6ee74c5..896a5f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -346,7 +346,6 @@ extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
  2010-01-25 22:15     ` Trond Myklebust
@ 2010-01-26 11:21         ` Christoph Hellwig
  -1 siblings, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-01-26 11:21 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
>  int nfs_wb_nocommit(struct inode *inode)
>  {
> -	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> +	return filemap_write_and_wait(inode->i_mapping);

Any point in keeping this as a wrapper for a single well-documented
caller?  Also taking i_mutex around it seems a bit questionable these
days given that filemap_write_and_wait avoids lifelocks with writing
applications okay and we use it without i_mutex all over the place.

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-26 11:21         ` Christoph Hellwig
  0 siblings, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-01-26 11:21 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro

On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
>  int nfs_wb_nocommit(struct inode *inode)
>  {
> -	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> +	return filemap_write_and_wait(inode->i_mapping);

Any point in keeping this as a wrapper for a single well-documented
caller?  Also taking i_mutex around it seems a bit questionable these
days given that filemap_write_and_wait avoids lifelocks with writing
applications okay and we use it without i_mutex all over the place.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
  2010-01-26 11:21         ` Christoph Hellwig
@ 2010-01-26 14:02             ` Trond Myklebust
  -1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 14:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote: 
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> >  int nfs_wb_nocommit(struct inode *inode)
> >  {
> > -	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > +	return filemap_write_and_wait(inode->i_mapping);
> 
> Any point in keeping this as a wrapper for a single well-documented
> caller?  Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
> 

Agreed, but just out of curiosity. Is there any reason why we shouldn't
use filemap_flush() + filemap_fdatawait() here instead? We're not really
interested in doing a full data integrity flush, but just want to make
sure that pages which were dirtied before the stat() syscall are flushed
to disk so that the server updates the c/mtime for us.

Cheers
  Trond
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-26 14:02             ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 14:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro

On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote: 
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> >  int nfs_wb_nocommit(struct inode *inode)
> >  {
> > -	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > +	return filemap_write_and_wait(inode->i_mapping);
> 
> Any point in keeping this as a wrapper for a single well-documented
> caller?  Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
> 

Agreed, but just out of curiosity. Is there any reason why we shouldn't
use filemap_flush() + filemap_fdatawait() here instead? We're not really
interested in doing a full data integrity flush, but just want to make
sure that pages which were dirtied before the stat() syscall are flushed
to disk so that the server updates the c/mtime for us.

Cheers
  Trond

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
  2010-01-26 11:21         ` Christoph Hellwig
@ 2010-01-26 23:17             ` Trond Myklebust
  -1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 23:17 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote: 
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> >  int nfs_wb_nocommit(struct inode *inode)
> >  {
> > -	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > +	return filemap_write_and_wait(inode->i_mapping);
> 
> Any point in keeping this as a wrapper for a single well-documented
> caller?  Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
> 

I'm replacing the former patch with the following updated version.

Cheers
  Trond

--------------------------------------------------------------------------------------------- 
NFS: Replace __nfs_write_mapping with sync_inode()

From: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>

Now that we have correct COMMIT semantics in writeback_single_inode, we can
reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a
call to filemap_write_and_wait(), which doesn't need to hold the
inode->i_mutex.

With that done, we can eliminate nfs_write_mapping() altogether.

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/inode.c         |   15 +++++----------
 fs/nfs/write.c         |   42 +++++-------------------------------------
 include/linux/nfs_fs.h |    2 --
 3 files changed, 10 insertions(+), 49 deletions(-)


diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8341709..733cb27 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
-	/*
-	 * Flush out writes to the server in order to update c/mtime.
-	 *
-	 * Hold the i_mutex to suspend application writes temporarily;
-	 * this prevents long-running writing applications from blocking
-	 * nfs_wb_nocommit.
-	 */
+	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {
-		mutex_lock(&inode->i_mutex);
-		nfs_wb_nocommit(inode);
-		mutex_unlock(&inode->i_mutex);
+		err = filemap_write_and_wait(inode->i_mapping);
+		if (err)
+			goto out;
 	}
 
 	/*
@@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 		generic_fillattr(inode, stat);
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	}
+out:
 	return err;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..c28cf57 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
-	int nocommit = how & FLUSH_NOCOMMIT;
 	long pages, ret;
 
 	/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 				npages = 0;
 		}
 	}
-	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		if (nocommit)
-			break;
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
@@ -1481,47 +1477,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	return ret;
 }
 
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	int ret;
-
-	ret = nfs_writepages(mapping, wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, wbc, how);
-	if (ret < 0)
-		goto out;
-	return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
 {
 	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 	};
 
-	return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, 0);
-}
-
-int nfs_wb_nocommit(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+	return sync_inode(inode, &wbc);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..3383622 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
 
 #ifdef __KERNEL__
 
@@ -477,7 +476,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  */
 extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-26 23:17             ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 23:17 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro

On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote: 
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> >  int nfs_wb_nocommit(struct inode *inode)
> >  {
> > -	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > +	return filemap_write_and_wait(inode->i_mapping);
> 
> Any point in keeping this as a wrapper for a single well-documented
> caller?  Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
> 

I'm replacing the former patch with the following updated version.

Cheers
  Trond

--------------------------------------------------------------------------------------------- 
NFS: Replace __nfs_write_mapping with sync_inode()

From: Trond Myklebust <Trond.Myklebust@netapp.com>

Now that we have correct COMMIT semantics in writeback_single_inode, we can
reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a
call to filemap_write_and_wait(), which doesn't need to hold the
inode->i_mutex.

With that done, we can eliminate nfs_write_mapping() altogether.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/inode.c         |   15 +++++----------
 fs/nfs/write.c         |   42 +++++-------------------------------------
 include/linux/nfs_fs.h |    2 --
 3 files changed, 10 insertions(+), 49 deletions(-)


diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8341709..733cb27 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
-	/*
-	 * Flush out writes to the server in order to update c/mtime.
-	 *
-	 * Hold the i_mutex to suspend application writes temporarily;
-	 * this prevents long-running writing applications from blocking
-	 * nfs_wb_nocommit.
-	 */
+	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {
-		mutex_lock(&inode->i_mutex);
-		nfs_wb_nocommit(inode);
-		mutex_unlock(&inode->i_mutex);
+		err = filemap_write_and_wait(inode->i_mapping);
+		if (err)
+			goto out;
 	}
 
 	/*
@@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 		generic_fillattr(inode, stat);
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	}
+out:
 	return err;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..c28cf57 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
-	int nocommit = how & FLUSH_NOCOMMIT;
 	long pages, ret;
 
 	/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 				npages = 0;
 		}
 	}
-	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		if (nocommit)
-			break;
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
@@ -1481,47 +1477,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	return ret;
 }
 
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	int ret;
-
-	ret = nfs_writepages(mapping, wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, wbc, how);
-	if (ret < 0)
-		goto out;
-	return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
 {
 	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 	};
 
-	return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, 0);
-}
-
-int nfs_wb_nocommit(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+	return sync_inode(inode, &wbc);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..3383622 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
 
 #ifdef __KERNEL__
 
@@ -477,7 +476,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  */
 extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
       [not found]     ` <20100125221545.16750.19154.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-10 18:51         ` J. R. Okajima
  0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-10 18:51 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro


Trond Myklebust:
> -static int nfs_wb_page_priority(struct inode *inode, struct page *page,
> -				int how)
> +/*
> + * Write back all requests on one page - we do this before reading it.
> + */
> +int nfs_wb_page(struct inode *inode, struct page *page)
>  {
	:::
> -	do {
> +	while(PagePrivate(page)) {
>  		if (clear_page_dirty_for_io(page)) {
>  			ret = nfs_writepage_locked(page, &wbc);
>  			if (ret < 0)
>  				goto out_error;
> -		} else if (!PagePrivate(page))
> +		}
> +		req = nfs_find_and_lock_request(page);
> +		if (!req)
>  			break;
> -		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
> -		if (ret < 0)
> +		if (IS_ERR(req)) {
> +			ret = PTR_ERR(req);
>  			goto out_error;
	:::

Hello Trond,

I am unsure whether this nfs_find_and_lock_request() call is correct or
not, but it brings me a problem.
This call trace in "kswapd blocked for more than brabra" message shows
that generic_delete_inode() blocked.
Is this put_nfs_open_context() -- iput() call in shrink_page_list()
context intended?
If not, I'd suggest a patch like this.


INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0       D 0000000000000001     0   305      2 0x00000000
 ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
 ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
 ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
 [<ffffffff8146155d>] io_schedule+0x4d/0x70
 [<ffffffff810d2be5>] sync_page+0x65/0xa0
 [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
 [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
 [<ffffffff810d2b64>] __lock_page+0x64/0x70
 [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
 [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
 [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
 [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
 [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
 [<ffffffff8112bb88>] iput+0x78/0x80
 [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
 [<ffffffff811285f4>] dentry_iput+0x84/0x110
 [<ffffffff811286ae>] d_kill+0x2e/0x60
 [<ffffffff8112912a>] dput+0x7a/0x170
 [<ffffffff8111e925>] path_put+0x15/0x40
 [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
 [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
 [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
 [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
 [<ffffffff81234b7e>] kref_put+0x8e/0xe0
 [<ffffffff811cb594>] nfs_release_request+0x14/0x20
 [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
 [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
 [<ffffffff811c0770>] nfs_release_page+0x70/0x90
 [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
 [<ffffffff810e1178>] shrink_page_list+0x638/0x860
 [<ffffffff810e19de>] shrink_zone+0x63e/0xc40
 [<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
 [<ffffffff8107641e>] ? up_read+0x1e/0x40
 [<ffffffff810e26a9>] kswapd+0x6c9/0xa20
 [<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
 [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
 [<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
 [<ffffffff810706d6>] kthread+0x96/0xb0
 [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81464f14>] ? restore_args+0x0/0x30
 [<ffffffff81070640>] ? kthread+0x0/0xb0
 [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
no locks held by kswapd0/305.


diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ae8d022..ffa5463 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 {
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
-	if (gfp & __GFP_WAIT)
+	if (gfp & __GFP_WAIT) {
+		struct inode *inode;
+
+		inode = igrab(page->mapping->host);
 		nfs_wb_page(page->mapping->host, page);
+		iput(inode);
+	}
 	/* If PagePrivate() is set, then the page is not freeable */
 	if (PagePrivate(page))
 		return 0;


J. R. Okajima
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-10 18:51         ` J. R. Okajima
  0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-10 18:51 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro


Trond Myklebust:
> -static int nfs_wb_page_priority(struct inode *inode, struct page *page,
> -				int how)
> +/*
> + * Write back all requests on one page - we do this before reading it.
> + */
> +int nfs_wb_page(struct inode *inode, struct page *page)
>  {
	:::
> -	do {
> +	while(PagePrivate(page)) {
>  		if (clear_page_dirty_for_io(page)) {
>  			ret = nfs_writepage_locked(page, &wbc);
>  			if (ret < 0)
>  				goto out_error;
> -		} else if (!PagePrivate(page))
> +		}
> +		req = nfs_find_and_lock_request(page);
> +		if (!req)
>  			break;
> -		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
> -		if (ret < 0)
> +		if (IS_ERR(req)) {
> +			ret = PTR_ERR(req);
>  			goto out_error;
	:::

Hello Trond,

I am unsure whether this nfs_find_and_lock_request() call is correct or
not, but it brings me a problem.
This call trace in "kswapd blocked for more than brabra" message shows
that generic_delete_inode() blocked.
Is this put_nfs_open_context() -- iput() call in shrink_page_list()
context intended?
If not, I'd suggest a patch like this.


INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0       D 0000000000000001     0   305      2 0x00000000
 ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
 ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
 ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
 [<ffffffff8146155d>] io_schedule+0x4d/0x70
 [<ffffffff810d2be5>] sync_page+0x65/0xa0
 [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
 [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
 [<ffffffff810d2b64>] __lock_page+0x64/0x70
 [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
 [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
 [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
 [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
 [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
 [<ffffffff8112bb88>] iput+0x78/0x80
 [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
 [<ffffffff811285f4>] dentry_iput+0x84/0x110
 [<ffffffff811286ae>] d_kill+0x2e/0x60
 [<ffffffff8112912a>] dput+0x7a/0x170
 [<ffffffff8111e925>] path_put+0x15/0x40
 [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
 [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
 [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
 [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
 [<ffffffff81234b7e>] kref_put+0x8e/0xe0
 [<ffffffff811cb594>] nfs_release_request+0x14/0x20
 [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
 [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
 [<ffffffff811c0770>] nfs_release_page+0x70/0x90
 [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
 [<ffffffff810e1178>] shrink_page_list+0x638/0x860
 [<ffffffff810e19de>] shrink_zone+0x63e/0xc40
 [<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
 [<ffffffff8107641e>] ? up_read+0x1e/0x40
 [<ffffffff810e26a9>] kswapd+0x6c9/0xa20
 [<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
 [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
 [<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
 [<ffffffff810706d6>] kthread+0x96/0xb0
 [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81464f14>] ? restore_args+0x0/0x30
 [<ffffffff81070640>] ? kthread+0x0/0xb0
 [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
no locks held by kswapd0/305.


diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ae8d022..ffa5463 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 {
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
-	if (gfp & __GFP_WAIT)
+	if (gfp & __GFP_WAIT) {
+		struct inode *inode;
+
+		inode = igrab(page->mapping->host);
 		nfs_wb_page(page->mapping->host, page);
+		iput(inode);
+	}
 	/* If PagePrivate() is set, then the page is not freeable */
 	if (PagePrivate(page))
 		return 0;


J. R. Okajima

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
  2010-03-10 18:51         ` J. R. Okajima
@ 2010-03-10 19:31           ` Trond Myklebust
  -1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 19:31 UTC (permalink / raw)
  To: J. R. Okajima
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Thu, 2010-03-11 at 03:51 +0900, J. R. Okajima wrote: 
> 
> INFO: task kswapd0:305 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> kswapd0       D 0000000000000001     0   305      2 0x00000000
>  ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
>  ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
>  ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
> Call Trace:
>  [<ffffffff8146155d>] io_schedule+0x4d/0x70
>  [<ffffffff810d2be5>] sync_page+0x65/0xa0
>  [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
>  [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
>  [<ffffffff810d2b64>] __lock_page+0x64/0x70
>  [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
>  [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
>  [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
>  [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
>  [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
>  [<ffffffff8112bb88>] iput+0x78/0x80
>  [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
>  [<ffffffff811285f4>] dentry_iput+0x84/0x110
>  [<ffffffff811286ae>] d_kill+0x2e/0x60
>  [<ffffffff8112912a>] dput+0x7a/0x170
>  [<ffffffff8111e925>] path_put+0x15/0x40
>  [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
>  [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
>  [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
>  [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
>  [<ffffffff81234b7e>] kref_put+0x8e/0xe0
>  [<ffffffff811cb594>] nfs_release_request+0x14/0x20
>  [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
>  [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
>  [<ffffffff811c0770>] nfs_release_page+0x70/0x90
>  [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
>  [<ffffffff810e1178>] shrink_page_list+0x638/0x860
>  [<ffffffff810e19de>] shrink_zone+0x63e/0xc40
>  [<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
>  [<ffffffff8107641e>] ? up_read+0x1e/0x40
>  [<ffffffff810e26a9>] kswapd+0x6c9/0xa20
>  [<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
>  [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
>  [<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
>  [<ffffffff810706d6>] kthread+0x96/0xb0
>  [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
>  [<ffffffff81464f14>] ? restore_args+0x0/0x30
>  [<ffffffff81070640>] ? kthread+0x0/0xb0
>  [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
> no locks held by kswapd0/305.
> 
> 
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index ae8d022..ffa5463 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
>  {
>  	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
>  
> -	if (gfp & __GFP_WAIT)
> +	if (gfp & __GFP_WAIT) {
> +		struct inode *inode;
> +
> +		inode = igrab(page->mapping->host);
>  		nfs_wb_page(page->mapping->host, page);
> +		iput(inode);
> +	}
>  	/* If PagePrivate() is set, then the page is not freeable */
>  	if (PagePrivate(page))
>  		return 0;
> 
> 
> J. R. Okajima

>From your trace it looks as if the problem is that the nfs_wb_page() is
triggering a dentry release, which deadlocks with in
truncate_inode_pages() because the _caller_ of nfs_release_page() holds
a page lock.

As far as I can see, your iput() call above can deadlock in exactly the
same way.

Note that shrink_page_list() is the only function that does this sort of
thing without holding a reference to the inode.

Cheers
  Trond
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-10 19:31           ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 19:31 UTC (permalink / raw)
  To: J. R. Okajima
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro

On Thu, 2010-03-11 at 03:51 +0900, J. R. Okajima wrote: 
> 
> INFO: task kswapd0:305 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> kswapd0       D 0000000000000001     0   305      2 0x00000000
>  ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
>  ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
>  ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
> Call Trace:
>  [<ffffffff8146155d>] io_schedule+0x4d/0x70
>  [<ffffffff810d2be5>] sync_page+0x65/0xa0
>  [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
>  [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
>  [<ffffffff810d2b64>] __lock_page+0x64/0x70
>  [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
>  [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
>  [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
>  [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
>  [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
>  [<ffffffff8112bb88>] iput+0x78/0x80
>  [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
>  [<ffffffff811285f4>] dentry_iput+0x84/0x110
>  [<ffffffff811286ae>] d_kill+0x2e/0x60
>  [<ffffffff8112912a>] dput+0x7a/0x170
>  [<ffffffff8111e925>] path_put+0x15/0x40
>  [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
>  [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
>  [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
>  [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
>  [<ffffffff81234b7e>] kref_put+0x8e/0xe0
>  [<ffffffff811cb594>] nfs_release_request+0x14/0x20
>  [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
>  [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
>  [<ffffffff811c0770>] nfs_release_page+0x70/0x90
>  [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
>  [<ffffffff810e1178>] shrink_page_list+0x638/0x860
>  [<ffffffff810e19de>] shrink_zone+0x63e/0xc40
>  [<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
>  [<ffffffff8107641e>] ? up_read+0x1e/0x40
>  [<ffffffff810e26a9>] kswapd+0x6c9/0xa20
>  [<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
>  [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
>  [<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
>  [<ffffffff810706d6>] kthread+0x96/0xb0
>  [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
>  [<ffffffff81464f14>] ? restore_args+0x0/0x30
>  [<ffffffff81070640>] ? kthread+0x0/0xb0
>  [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
> no locks held by kswapd0/305.
> 
> 
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index ae8d022..ffa5463 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
>  {
>  	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
>  
> -	if (gfp & __GFP_WAIT)
> +	if (gfp & __GFP_WAIT) {
> +		struct inode *inode;
> +
> +		inode = igrab(page->mapping->host);
>  		nfs_wb_page(page->mapping->host, page);
> +		iput(inode);
> +	}
>  	/* If PagePrivate() is set, then the page is not freeable */
>  	if (PagePrivate(page))
>  		return 0;
> 
> 
> J. R. Okajima

>From your trace it looks as if the problem is that the nfs_wb_page() is
triggering a dentry release, which deadlocks with in
truncate_inode_pages() because the _caller_ of nfs_release_page() holds
a page lock.

As far as I can see, your iput() call above can deadlock in exactly the
same way.

Note that shrink_page_list() is the only function that does this sort of
thing without holding a reference to the inode.

Cheers
  Trond

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
       [not found]           ` <1268249482.3096.76.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-10 20:18               ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 20:18 UTC (permalink / raw)
  To: J. R. Okajima
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Wed, 2010-03-10 at 14:31 -0500, Trond Myklebust wrote: 
> >From your trace it looks as if the problem is that the nfs_wb_page() is
> triggering a dentry release, which deadlocks with in
> truncate_inode_pages() because the _caller_ of nfs_release_page() holds
> a page lock.
> 
> As far as I can see, your iput() call above can deadlock in exactly the
> same way.
> 
> Note that shrink_page_list() is the only function that does this sort of
> thing without holding a reference to the inode.

OK. Does the following patch fix the deadlock for you?

Cheers
  Trond
----------------------------------------------------------------------------------------------------------- 
NFS: Avoid a deadlock in nfs_release_page

From: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>

J.R. Okajima reports the following deadlock:

INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0       D 0000000000000001     0   305      2 0x00000000
 ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
 ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
 ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
 [<ffffffff8146155d>] io_schedule+0x4d/0x70
 [<ffffffff810d2be5>] sync_page+0x65/0xa0
 [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
 [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
 [<ffffffff810d2b64>] __lock_page+0x64/0x70
 [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
 [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
 [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
 [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
 [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
 [<ffffffff8112bb88>] iput+0x78/0x80
 [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
 [<ffffffff811285f4>] dentry_iput+0x84/0x110
 [<ffffffff811286ae>] d_kill+0x2e/0x60
 [<ffffffff8112912a>] dput+0x7a/0x170
 [<ffffffff8111e925>] path_put+0x15/0x40
 [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
 [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
 [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
 [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
 [<ffffffff81234b7e>] kref_put+0x8e/0xe0
 [<ffffffff811cb594>] nfs_release_request+0x14/0x20
 [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
 [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
 [<ffffffff811c0770>] nfs_release_page+0x70/0x90
 [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
 [<ffffffff810e1178>] shrink_page_list+0x638/0x860
 [<ffffffff810e19de>] shrink_zone+0x63e/0xc40

We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---

 fs/nfs/pagelist.c |    9 +++++++--
 1 files changed, 7 insertions(+), 2 deletions(-)


diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..81fb4a5 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -148,10 +148,16 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
 void nfs_clear_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
+	struct nfs_open_context *ctx = req->wb_context;
+
 	if (page != NULL) {
 		page_cache_release(page);
 		req->wb_page = NULL;
 	}
+	if (ctx != NULL) {
+		put_nfs_open_context(ctx);
+		req->wb_context = NULL;
+	}
 }
 

@@ -165,9 +171,8 @@ static void nfs_free_request(struct kref *kref)
 {
 	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 
-	/* Release struct file or cached credential */
+	/* Release struct file and open context */
 	nfs_clear_request(req);
-	put_nfs_open_context(req->wb_context);
 	nfs_page_free(req);
 }
 



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-10 20:18               ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 20:18 UTC (permalink / raw)
  To: J. R. Okajima
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro

On Wed, 2010-03-10 at 14:31 -0500, Trond Myklebust wrote: 
> >From your trace it looks as if the problem is that the nfs_wb_page() is
> triggering a dentry release, which deadlocks with in
> truncate_inode_pages() because the _caller_ of nfs_release_page() holds
> a page lock.
> 
> As far as I can see, your iput() call above can deadlock in exactly the
> same way.
> 
> Note that shrink_page_list() is the only function that does this sort of
> thing without holding a reference to the inode.

OK. Does the following patch fix the deadlock for you?

Cheers
  Trond
----------------------------------------------------------------------------------------------------------- 
NFS: Avoid a deadlock in nfs_release_page

From: Trond Myklebust <Trond.Myklebust@netapp.com>

J.R. Okajima reports the following deadlock:

INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0       D 0000000000000001     0   305      2 0x00000000
 ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
 ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
 ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
 [<ffffffff8146155d>] io_schedule+0x4d/0x70
 [<ffffffff810d2be5>] sync_page+0x65/0xa0
 [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
 [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
 [<ffffffff810d2b64>] __lock_page+0x64/0x70
 [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
 [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
 [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
 [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
 [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
 [<ffffffff8112bb88>] iput+0x78/0x80
 [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
 [<ffffffff811285f4>] dentry_iput+0x84/0x110
 [<ffffffff811286ae>] d_kill+0x2e/0x60
 [<ffffffff8112912a>] dput+0x7a/0x170
 [<ffffffff8111e925>] path_put+0x15/0x40
 [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
 [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
 [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
 [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
 [<ffffffff81234b7e>] kref_put+0x8e/0xe0
 [<ffffffff811cb594>] nfs_release_request+0x14/0x20
 [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
 [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
 [<ffffffff811c0770>] nfs_release_page+0x70/0x90
 [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
 [<ffffffff810e1178>] shrink_page_list+0x638/0x860
 [<ffffffff810e19de>] shrink_zone+0x63e/0xc40

We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---

 fs/nfs/pagelist.c |    9 +++++++--
 1 files changed, 7 insertions(+), 2 deletions(-)


diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..81fb4a5 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -148,10 +148,16 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
 void nfs_clear_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
+	struct nfs_open_context *ctx = req->wb_context;
+
 	if (page != NULL) {
 		page_cache_release(page);
 		req->wb_page = NULL;
 	}
+	if (ctx != NULL) {
+		put_nfs_open_context(ctx);
+		req->wb_context = NULL;
+	}
 }
 

@@ -165,9 +171,8 @@ static void nfs_free_request(struct kref *kref)
 {
 	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 
-	/* Release struct file or cached credential */
+	/* Release struct file and open context */
 	nfs_clear_request(req);
-	put_nfs_open_context(req->wb_context);
 	nfs_page_free(req);
 }
 




^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
       [not found]               ` <1268252300.3096.81.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-11  4:45                   ` J. R. Okajima
  0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-11  4:45 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro


Trond Myklebust:
> OK. Does the following patch fix the deadlock for you?

Thanx for the patch, but it didn't work.

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
PGD 1e641067 PUD 135a3067 PMD 0 
Oops: 0000 [#1] PREEMPT SMP 
last sysfs file: /sys/fs/aufs/si_fa7907d8bbb32280/br1
CPU 0 
Modules linked in: aufs configs nfsd exportfs nls_iso8859_1 nls_cp437 [last unloaded: aufs]

Pid: 308, comm: nfsiod Not tainted 2.6.33aufsD #730 IPM41/Pegatron
RIP: 0010:[<ffffffff811cb860>]  [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
RSP: 0018:ffff88001f26bd10  EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff86b47850
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88001b87e7c0
RBP: ffff88001f26bd30 R08: fdb3fbf8f8168244 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffff88001b87e7c0
R13: ffff88001f277110 R14: 0000000000000000 R15: ffff88001f2772f8
FS:  0000000000000000(0000) GS:ffff88000bc00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000010 CR3: 000000001de5a000 CR4: 00000000000406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process nfsiod (pid: 308, threadinfo ffff88001f26a000, task ffff88001fe92040)
Stack:
 ffff88001b87e7c0 0000000000000000 ffff88001b87e7c0 ffff88001f277110
<0> ffff88001f26bd70 ffffffff811d1464 ffff88001f2772ec 0000000000000000
<0> ffff88001f277118 ffff88001f277110 ffffffff8162b980 ffffffff81435e20
Call Trace:
 [<ffffffff811d1464>] nfs_commit_release+0x74/0x210
 [<ffffffff81435e20>] ? rpc_async_release+0x0/0x20
 [<ffffffff81435c82>] rpc_release_calldata+0x12/0x20
 [<ffffffff81435cea>] rpc_free_task+0x3a/0xa0
 [<ffffffff81435e30>] rpc_async_release+0x10/0x20
 [<ffffffff8106bd84>] worker_thread+0x2d4/0x420
 [<ffffffff8106bd31>] ? worker_thread+0x281/0x420
 [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
 [<ffffffff8106bab0>] ? worker_thread+0x0/0x420
 [<ffffffff810706d6>] kthread+0x96/0xb0
 [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81464f14>] ? restore_args+0x0/0x30
 [<ffffffff81070640>] ? kthread+0x0/0xb0
 [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
Code: 29 00 0f 0b eb fe 0f 1f 44 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 48 89 5d e8 4c 89 6d f8 49 89 fc 48 8b 47 18 48 83 7f 10 00 <48> 8b 40 10 4c 8b 68 40 74 46 49 8d 9d b0 00 00 00 48 89 df e8 
RIP  [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
 RSP <ffff88001f26bd10>
CR2: 0000000000000010
---[ end trace 1265485acdc49b9d ]---


J. R. Okajima
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-11  4:45                   ` J. R. Okajima
  0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-11  4:45 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro


Trond Myklebust:
> OK. Does the following patch fix the deadlock for you?

Thanx for the patch, but it didn't work.

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
PGD 1e641067 PUD 135a3067 PMD 0 
Oops: 0000 [#1] PREEMPT SMP 
last sysfs file: /sys/fs/aufs/si_fa7907d8bbb32280/br1
CPU 0 
Modules linked in: aufs configs nfsd exportfs nls_iso8859_1 nls_cp437 [last unloaded: aufs]

Pid: 308, comm: nfsiod Not tainted 2.6.33aufsD #730 IPM41/Pegatron
RIP: 0010:[<ffffffff811cb860>]  [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
RSP: 0018:ffff88001f26bd10  EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff86b47850
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88001b87e7c0
RBP: ffff88001f26bd30 R08: fdb3fbf8f8168244 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffff88001b87e7c0
R13: ffff88001f277110 R14: 0000000000000000 R15: ffff88001f2772f8
FS:  0000000000000000(0000) GS:ffff88000bc00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000010 CR3: 000000001de5a000 CR4: 00000000000406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process nfsiod (pid: 308, threadinfo ffff88001f26a000, task ffff88001fe92040)
Stack:
 ffff88001b87e7c0 0000000000000000 ffff88001b87e7c0 ffff88001f277110
<0> ffff88001f26bd70 ffffffff811d1464 ffff88001f2772ec 0000000000000000
<0> ffff88001f277118 ffff88001f277110 ffffffff8162b980 ffffffff81435e20
Call Trace:
 [<ffffffff811d1464>] nfs_commit_release+0x74/0x210
 [<ffffffff81435e20>] ? rpc_async_release+0x0/0x20
 [<ffffffff81435c82>] rpc_release_calldata+0x12/0x20
 [<ffffffff81435cea>] rpc_free_task+0x3a/0xa0
 [<ffffffff81435e30>] rpc_async_release+0x10/0x20
 [<ffffffff8106bd84>] worker_thread+0x2d4/0x420
 [<ffffffff8106bd31>] ? worker_thread+0x281/0x420
 [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
 [<ffffffff8106bab0>] ? worker_thread+0x0/0x420
 [<ffffffff810706d6>] kthread+0x96/0xb0
 [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81464f14>] ? restore_args+0x0/0x30
 [<ffffffff81070640>] ? kthread+0x0/0xb0
 [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
Code: 29 00 0f 0b eb fe 0f 1f 44 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 48 89 5d e8 4c 89 6d f8 49 89 fc 48 8b 47 18 48 83 7f 10 00 <48> 8b 40 10 4c 8b 68 40 74 46 49 8d 9d b0 00 00 00 48 89 df e8 
RIP  [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
 RSP <ffff88001f26bd10>
CR2: 0000000000000010
---[ end trace 1265485acdc49b9d ]---


J. R. Okajima

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
  2010-03-11  4:45                   ` J. R. Okajima
@ 2010-03-11 14:26                     ` Trond Myklebust
  -1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-11 14:26 UTC (permalink / raw)
  To: J. R. Okajima
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Thu, 2010-03-11 at 13:45 +0900, J. R. Okajima wrote: 
> Trond Myklebust:
> > OK. Does the following patch fix the deadlock for you?
> 
> Thanx for the patch, but it didn't work.
> 
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
> IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80

Oops... Yes, that needs to be fixed, and we should probably make
nfs_set_page_tag_locked() safe too.

How about the following? I can't seem to find any further dereferences
of req->wb_context after the nfs_clear_request().

Cheers
  Trond
------------------------------------------------------------------------------------------------------- 
NFS: Avoid a deadlock in nfs_release_page

From: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>

J.R. Okajima reports the following deadlock:

INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0       D 0000000000000001     0   305      2 0x00000000
 ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
 ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
 ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
 [<ffffffff8146155d>] io_schedule+0x4d/0x70
 [<ffffffff810d2be5>] sync_page+0x65/0xa0
 [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
 [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
 [<ffffffff810d2b64>] __lock_page+0x64/0x70
 [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
 [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
 [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
 [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
 [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
 [<ffffffff8112bb88>] iput+0x78/0x80
 [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
 [<ffffffff811285f4>] dentry_iput+0x84/0x110
 [<ffffffff811286ae>] d_kill+0x2e/0x60
 [<ffffffff8112912a>] dput+0x7a/0x170
 [<ffffffff8111e925>] path_put+0x15/0x40
 [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
 [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
 [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
 [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
 [<ffffffff81234b7e>] kref_put+0x8e/0xe0
 [<ffffffff811cb594>] nfs_release_request+0x14/0x20
 [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
 [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
 [<ffffffff811c0770>] nfs_release_page+0x70/0x90
 [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
 [<ffffffff810e1178>] shrink_page_list+0x638/0x860
 [<ffffffff810e19de>] shrink_zone+0x63e/0xc40

We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).

Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Cc: stable-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
---

 fs/nfs/pagelist.c |   23 +++++++++++++----------
 1 files changed, 13 insertions(+), 10 deletions(-)


diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..29d9d36 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
  */
 int nfs_set_page_tag_locked(struct nfs_page *req)
 {
-	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
-
 	if (!nfs_lock_request_dontget(req))
 		return 0;
 	if (req->wb_page != NULL)
-		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 1;
 }
 
@@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
  */
 void nfs_clear_page_tag_locked(struct nfs_page *req)
 {
-	struct inode *inode = req->wb_context->path.dentry->d_inode;
-	struct nfs_inode *nfsi = NFS_I(inode);
-
 	if (req->wb_page != NULL) {
+		struct inode *inode = req->wb_context->path.dentry->d_inode;
+		struct nfs_inode *nfsi = NFS_I(inode);
+
 		spin_lock(&inode->i_lock);
 		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 		nfs_unlock_request(req);
@@ -142,16 +140,22 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
  * nfs_clear_request - Free up all resources allocated to the request
  * @req:
  *
- * Release page resources associated with a write request after it
- * has completed.
+ * Release page and open context resources associated with a read/write
+ * request after it has completed.
  */
 void nfs_clear_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
+	struct nfs_open_context *ctx = req->wb_context;
+
 	if (page != NULL) {
 		page_cache_release(page);
 		req->wb_page = NULL;
 	}
+	if (ctx != NULL) {
+		put_nfs_open_context(ctx);
+		req->wb_context = NULL;
+	}
 }
 

@@ -165,9 +169,8 @@ static void nfs_free_request(struct kref *kref)
 {
 	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 
-	/* Release struct file or cached credential */
+	/* Release struct file and open context */
 	nfs_clear_request(req);
-	put_nfs_open_context(req->wb_context);
 	nfs_page_free(req);
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-11 14:26                     ` Trond Myklebust
  0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-11 14:26 UTC (permalink / raw)
  To: J. R. Okajima
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro

On Thu, 2010-03-11 at 13:45 +0900, J. R. Okajima wrote: 
> Trond Myklebust:
> > OK. Does the following patch fix the deadlock for you?
> 
> Thanx for the patch, but it didn't work.
> 
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
> IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80

Oops... Yes, that needs to be fixed, and we should probably make
nfs_set_page_tag_locked() safe too.

How about the following? I can't seem to find any further dereferences
of req->wb_context after the nfs_clear_request().

Cheers
  Trond
------------------------------------------------------------------------------------------------------- 
NFS: Avoid a deadlock in nfs_release_page

From: Trond Myklebust <Trond.Myklebust@netapp.com>

J.R. Okajima reports the following deadlock:

INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0       D 0000000000000001     0   305      2 0x00000000
 ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
 ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
 ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
 [<ffffffff8146155d>] io_schedule+0x4d/0x70
 [<ffffffff810d2be5>] sync_page+0x65/0xa0
 [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
 [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
 [<ffffffff810d2b64>] __lock_page+0x64/0x70
 [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
 [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
 [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
 [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
 [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
 [<ffffffff8112bb88>] iput+0x78/0x80
 [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
 [<ffffffff811285f4>] dentry_iput+0x84/0x110
 [<ffffffff811286ae>] d_kill+0x2e/0x60
 [<ffffffff8112912a>] dput+0x7a/0x170
 [<ffffffff8111e925>] path_put+0x15/0x40
 [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
 [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
 [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
 [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
 [<ffffffff81234b7e>] kref_put+0x8e/0xe0
 [<ffffffff811cb594>] nfs_release_request+0x14/0x20
 [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
 [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
 [<ffffffff811c0770>] nfs_release_page+0x70/0x90
 [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
 [<ffffffff810e1178>] shrink_page_list+0x638/0x860
 [<ffffffff810e19de>] shrink_zone+0x63e/0xc40

We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---

 fs/nfs/pagelist.c |   23 +++++++++++++----------
 1 files changed, 13 insertions(+), 10 deletions(-)


diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..29d9d36 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
  */
 int nfs_set_page_tag_locked(struct nfs_page *req)
 {
-	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
-
 	if (!nfs_lock_request_dontget(req))
 		return 0;
 	if (req->wb_page != NULL)
-		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 1;
 }
 
@@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
  */
 void nfs_clear_page_tag_locked(struct nfs_page *req)
 {
-	struct inode *inode = req->wb_context->path.dentry->d_inode;
-	struct nfs_inode *nfsi = NFS_I(inode);
-
 	if (req->wb_page != NULL) {
+		struct inode *inode = req->wb_context->path.dentry->d_inode;
+		struct nfs_inode *nfsi = NFS_I(inode);
+
 		spin_lock(&inode->i_lock);
 		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 		nfs_unlock_request(req);
@@ -142,16 +140,22 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
  * nfs_clear_request - Free up all resources allocated to the request
  * @req:
  *
- * Release page resources associated with a write request after it
- * has completed.
+ * Release page and open context resources associated with a read/write
+ * request after it has completed.
  */
 void nfs_clear_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
+	struct nfs_open_context *ctx = req->wb_context;
+
 	if (page != NULL) {
 		page_cache_release(page);
 		req->wb_page = NULL;
 	}
+	if (ctx != NULL) {
+		put_nfs_open_context(ctx);
+		req->wb_context = NULL;
+	}
 }
 

@@ -165,9 +169,8 @@ static void nfs_free_request(struct kref *kref)
 {
 	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 
-	/* Release struct file or cached credential */
+	/* Release struct file and open context */
 	nfs_clear_request(req);
-	put_nfs_open_context(req->wb_context);
 	nfs_page_free(req);
 }
 


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
       [not found]                     ` <1268317582.3354.9.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-12  4:22                         ` J. R. Okajima
  2010-03-17 16:49                         ` Christoph Hellwig
  1 sibling, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-12  4:22 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro


Trond Myklebust:
> Oops... Yes, that needs to be fixed, and we should probably make
> nfs_set_page_tag_locked() safe too.
>
> How about the following? I can't seem to find any further dereferences
> of req->wb_context after the nfs_clear_request().

It passed all of my local tests successfully.
Thank you.


J. R. Okajima
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-12  4:22                         ` J. R. Okajima
  0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-12  4:22 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
	Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel, Christoph Hellwig, Al Viro


Trond Myklebust:
> Oops... Yes, that needs to be fixed, and we should probably make
> nfs_set_page_tag_locked() safe too.
>
> How about the following? I can't seem to find any further dereferences
> of req->wb_context after the nfs_clear_request().

It passed all of my local tests successfully.
Thank you.


J. R. Okajima

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
       [not found]                     ` <1268317582.3354.9.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-17 16:49                         ` Christoph Hellwig
  2010-03-17 16:49                         ` Christoph Hellwig
  1 sibling, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-03-17 16:49 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: J. R. Okajima, linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang,
	Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
	Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
>   */
>  int nfs_set_page_tag_locked(struct nfs_page *req)
>  {
> -	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> -
>  	if (!nfs_lock_request_dontget(req))
>  		return 0;
>  	if (req->wb_page != NULL)
> -		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> +		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
>  	return 1;
>  }
>  

This hunk is not only entirely unrealted to the fix, but also osbfucates
the code.

> @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
>   */
>  void nfs_clear_page_tag_locked(struct nfs_page *req)
>  {
> -	struct inode *inode = req->wb_context->path.dentry->d_inode;
> -	struct nfs_inode *nfsi = NFS_I(inode);
> -
>  	if (req->wb_page != NULL) {
> +		struct inode *inode = req->wb_context->path.dentry->d_inode;
> +		struct nfs_inode *nfsi = NFS_I(inode);
> +
>  		spin_lock(&inode->i_lock);
>  		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
>  		nfs_unlock_request(req);

Another one unrelated to the fix.

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-17 16:49                         ` Christoph Hellwig
  0 siblings, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-03-17 16:49 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: J. R. Okajima, linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara,
	Steve Rago, Jens Axboe, Peter Staubach, Arjan van de Ven,
	Ingo Molnar, linux-fsdevel, Christoph Hellwig, Al Viro

On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
>   */
>  int nfs_set_page_tag_locked(struct nfs_page *req)
>  {
> -	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> -
>  	if (!nfs_lock_request_dontget(req))
>  		return 0;
>  	if (req->wb_page != NULL)
> -		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> +		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
>  	return 1;
>  }
>  

This hunk is not only entirely unrealted to the fix, but also osbfucates
the code.

> @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
>   */
>  void nfs_clear_page_tag_locked(struct nfs_page *req)
>  {
> -	struct inode *inode = req->wb_context->path.dentry->d_inode;
> -	struct nfs_inode *nfsi = NFS_I(inode);
> -
>  	if (req->wb_page != NULL) {
> +		struct inode *inode = req->wb_context->path.dentry->d_inode;
> +		struct nfs_inode *nfsi = NFS_I(inode);
> +
>  		spin_lock(&inode->i_lock);
>  		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
>  		nfs_unlock_request(req);

Another one unrelated to the fix.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
  2010-03-17 16:49                         ` Christoph Hellwig
  (?)
@ 2010-03-17 17:26                         ` Trond Myklebust
  2010-03-17 17:52                           ` Jeff Layton
  -1 siblings, 1 reply; 48+ messages in thread
From: Trond Myklebust @ 2010-03-17 17:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: J. R. Okajima, linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara,
	Steve Rago, Jens Axboe, Peter Staubach, Arjan van de Ven,
	Ingo Molnar, linux-fsdevel, Christoph Hellwig, Al Viro

On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote: 
> On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> >   */
> >  int nfs_set_page_tag_locked(struct nfs_page *req)
> >  {
> > -	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > -
> >  	if (!nfs_lock_request_dontget(req))
> >  		return 0;
> >  	if (req->wb_page != NULL)
> > -		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > +		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> >  	return 1;
> >  }
> >  
> 
> This hunk is not only entirely unrealted to the fix, but also osbfucates
> the code.
> 
> > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> >   */
> >  void nfs_clear_page_tag_locked(struct nfs_page *req)
> >  {
> > -	struct inode *inode = req->wb_context->path.dentry->d_inode;
> > -	struct nfs_inode *nfsi = NFS_I(inode);
> > -
> >  	if (req->wb_page != NULL) {
> > +		struct inode *inode = req->wb_context->path.dentry->d_inode;
> > +		struct nfs_inode *nfsi = NFS_I(inode);
> > +
> >  		spin_lock(&inode->i_lock);
> >  		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> >  		nfs_unlock_request(req);
> 
> Another one unrelated to the fix.
> 

No. They are needed because we don't want to dereference req->wb_context
after it (and req->wb_page) have been cleared. Without these 2 hunks,
the resulting kernel Oopses.

Trond

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
  2010-03-17 17:26                         ` Trond Myklebust
@ 2010-03-17 17:52                           ` Jeff Layton
  2010-03-17 17:58                             ` Trond Myklebust
  0 siblings, 1 reply; 48+ messages in thread
From: Jeff Layton @ 2010-03-17 17:52 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. R. Okajima, linux-nfs, Wu Fengguang,
	Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Arjan van de Ven, Ingo Molnar, linux-fsdevel, Christoph Hellwig,
	Al Viro

On Wed, 17 Mar 2010 13:26:59 -0400
Trond Myklebust <Trond.Myklebust@netapp.com> wrote:

> On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote: 
> > On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> > >   */
> > >  int nfs_set_page_tag_locked(struct nfs_page *req)
> > >  {
> > > -	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > > -
> > >  	if (!nfs_lock_request_dontget(req))
> > >  		return 0;
> > >  	if (req->wb_page != NULL)
> > > -		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > +		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > >  	return 1;
> > >  }
> > >  
> > 
> > This hunk is not only entirely unrealted to the fix, but also osbfucates
> > the code.
> > 
> > > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> > >   */
> > >  void nfs_clear_page_tag_locked(struct nfs_page *req)
> > >  {
> > > -	struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > -	struct nfs_inode *nfsi = NFS_I(inode);
> > > -
> > >  	if (req->wb_page != NULL) {
> > > +		struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > +		struct nfs_inode *nfsi = NFS_I(inode);
> > > +
> > >  		spin_lock(&inode->i_lock);
> > >  		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > >  		nfs_unlock_request(req);
> > 
> > Another one unrelated to the fix.
> > 
> 
> No. They are needed because we don't want to dereference req->wb_context
> after it (and req->wb_page) have been cleared. Without these 2 hunks,
> the resulting kernel Oopses.
> 

It seems like that just tightens up the race window without actually
closing it.

Just because wb_page was non-NULL when you checked it gives no
guarantee that wb_context won't be NULL when you go to dereference it,
right?

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
  2010-03-17 17:52                           ` Jeff Layton
@ 2010-03-17 17:58                             ` Trond Myklebust
       [not found]                               ` <1268848682.8335.5.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  0 siblings, 1 reply; 48+ messages in thread
From: Trond Myklebust @ 2010-03-17 17:58 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Christoph Hellwig, J. R. Okajima, linux-nfs, Wu Fengguang,
	Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Arjan van de Ven, Ingo Molnar, linux-fsdevel, Christoph Hellwig,
	Al Viro

On Wed, 2010-03-17 at 13:52 -0400, Jeff Layton wrote: 
> On Wed, 17 Mar 2010 13:26:59 -0400
> Trond Myklebust <Trond.Myklebust@netapp.com> wrote:
> 
> > On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote: 
> > > On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > > > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> > > >   */
> > > >  int nfs_set_page_tag_locked(struct nfs_page *req)
> > > >  {
> > > > -	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > > > -
> > > >  	if (!nfs_lock_request_dontget(req))
> > > >  		return 0;
> > > >  	if (req->wb_page != NULL)
> > > > -		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > +		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > >  	return 1;
> > > >  }
> > > >  
> > > 
> > > This hunk is not only entirely unrealted to the fix, but also osbfucates
> > > the code.
> > > 
> > > > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> > > >   */
> > > >  void nfs_clear_page_tag_locked(struct nfs_page *req)
> > > >  {
> > > > -	struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > -	struct nfs_inode *nfsi = NFS_I(inode);
> > > > -
> > > >  	if (req->wb_page != NULL) {
> > > > +		struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > +		struct nfs_inode *nfsi = NFS_I(inode);
> > > > +
> > > >  		spin_lock(&inode->i_lock);
> > > >  		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > >  		nfs_unlock_request(req);
> > > 
> > > Another one unrelated to the fix.
> > > 
> > 
> > No. They are needed because we don't want to dereference req->wb_context
> > after it (and req->wb_page) have been cleared. Without these 2 hunks,
> > the resulting kernel Oopses.
> > 
> 
> It seems like that just tightens up the race window without actually
> closing it.
> 
> Just because wb_page was non-NULL when you checked it gives no
> guarantee that wb_context won't be NULL when you go to dereference it,
> right?
> 

The above 2 functions are the ones that lock and unlock the request.
Once locked by means of the call to nfs_lock_request_dontget(req), no
other threads can change the contents of wb_page and wb_context.

IOW: yes, there is definitely such a guarantee...

Trond

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
       [not found]                               ` <1268848682.8335.5.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-17 18:08                                   ` Jeff Layton
  0 siblings, 0 replies; 48+ messages in thread
From: Jeff Layton @ 2010-03-17 18:08 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. R. Okajima,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
	Jan Kara, Steve Rago, Jens Axboe, Arjan van de Ven, Ingo Molnar,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro

On Wed, 17 Mar 2010 13:58:02 -0400
Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org> wrote:

> On Wed, 2010-03-17 at 13:52 -0400, Jeff Layton wrote: 
> > On Wed, 17 Mar 2010 13:26:59 -0400
> > Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org> wrote:
> > 
> > > On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote: 
> > > > On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > > > > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> > > > >   */
> > > > >  int nfs_set_page_tag_locked(struct nfs_page *req)
> > > > >  {
> > > > > -	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > > > > -
> > > > >  	if (!nfs_lock_request_dontget(req))
> > > > >  		return 0;
> > > > >  	if (req->wb_page != NULL)
> > > > > -		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > > +		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > >  	return 1;
> > > > >  }
> > > > >  
> > > > 
> > > > This hunk is not only entirely unrealted to the fix, but also osbfucates
> > > > the code.
> > > > 
> > > > > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> > > > >   */
> > > > >  void nfs_clear_page_tag_locked(struct nfs_page *req)
> > > > >  {
> > > > > -	struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > > -	struct nfs_inode *nfsi = NFS_I(inode);
> > > > > -
> > > > >  	if (req->wb_page != NULL) {
> > > > > +		struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > > +		struct nfs_inode *nfsi = NFS_I(inode);
> > > > > +
> > > > >  		spin_lock(&inode->i_lock);
> > > > >  		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > >  		nfs_unlock_request(req);
> > > > 
> > > > Another one unrelated to the fix.
> > > > 
> > > 
> > > No. They are needed because we don't want to dereference req->wb_context
> > > after it (and req->wb_page) have been cleared. Without these 2 hunks,
> > > the resulting kernel Oopses.
> > > 
> > 
> > It seems like that just tightens up the race window without actually
> > closing it.
> > 
> > Just because wb_page was non-NULL when you checked it gives no
> > guarantee that wb_context won't be NULL when you go to dereference it,
> > right?
> > 
> 
> The above 2 functions are the ones that lock and unlock the request.
> Once locked by means of the call to nfs_lock_request_dontget(req), no
> other threads can change the contents of wb_page and wb_context.
> 
> IOW: yes, there is definitely such a guarantee...
> 

Ok, I think I see now.

Thanks,
-- 
Jeff Layton <jlayton-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-17 18:08                                   ` Jeff Layton
  0 siblings, 0 replies; 48+ messages in thread
From: Jeff Layton @ 2010-03-17 18:08 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Christoph Hellwig, J. R. Okajima, linux-nfs, Wu Fengguang,
	Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
	Arjan van de Ven, Ingo Molnar, linux-fsdevel, Christoph Hellwig,
	Al Viro

On Wed, 17 Mar 2010 13:58:02 -0400
Trond Myklebust <Trond.Myklebust@netapp.com> wrote:

> On Wed, 2010-03-17 at 13:52 -0400, Jeff Layton wrote: 
> > On Wed, 17 Mar 2010 13:26:59 -0400
> > Trond Myklebust <Trond.Myklebust@netapp.com> wrote:
> > 
> > > On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote: 
> > > > On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > > > > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> > > > >   */
> > > > >  int nfs_set_page_tag_locked(struct nfs_page *req)
> > > > >  {
> > > > > -	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > > > > -
> > > > >  	if (!nfs_lock_request_dontget(req))
> > > > >  		return 0;
> > > > >  	if (req->wb_page != NULL)
> > > > > -		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > > +		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > >  	return 1;
> > > > >  }
> > > > >  
> > > > 
> > > > This hunk is not only entirely unrealted to the fix, but also osbfucates
> > > > the code.
> > > > 
> > > > > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> > > > >   */
> > > > >  void nfs_clear_page_tag_locked(struct nfs_page *req)
> > > > >  {
> > > > > -	struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > > -	struct nfs_inode *nfsi = NFS_I(inode);
> > > > > -
> > > > >  	if (req->wb_page != NULL) {
> > > > > +		struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > > +		struct nfs_inode *nfsi = NFS_I(inode);
> > > > > +
> > > > >  		spin_lock(&inode->i_lock);
> > > > >  		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > >  		nfs_unlock_request(req);
> > > > 
> > > > Another one unrelated to the fix.
> > > > 
> > > 
> > > No. They are needed because we don't want to dereference req->wb_context
> > > after it (and req->wb_page) have been cleared. Without these 2 hunks,
> > > the resulting kernel Oopses.
> > > 
> > 
> > It seems like that just tightens up the race window without actually
> > closing it.
> > 
> > Just because wb_page was non-NULL when you checked it gives no
> > guarantee that wb_context won't be NULL when you go to dereference it,
> > right?
> > 
> 
> The above 2 functions are the ones that lock and unlock the request.
> Once locked by means of the call to nfs_lock_request_dontget(req), no
> other threads can change the contents of wb_page and wb_context.
> 
> IOW: yes, there is definitely such a guarantee...
> 

Ok, I think I see now.

Thanks,
-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2010-03-17 18:08 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-25 22:15 [PATCH 00/12] Re: [PATCH] improve the performance of large sequential write NFS workloads Trond Myklebust
2010-01-25 22:15 ` [PATCH 05/12] VM/NFS: The VM must tell the filesystem when to free reclaimable pages Trond Myklebust
     [not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-01-25 22:15   ` [PATCH 04/12] NFS: Reduce the number of unnecessary COMMIT calls Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 03/12] NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 06/12] NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode() Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
     [not found]     ` <20100125221545.16750.63968.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-01-26 11:21       ` Christoph Hellwig
2010-01-26 11:21         ` Christoph Hellwig
     [not found]         ` <20100126112148.GA25170-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2010-01-26 14:02           ` Trond Myklebust
2010-01-26 14:02             ` Trond Myklebust
2010-01-26 23:17           ` Trond Myklebust
2010-01-26 23:17             ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 07/12] NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 01/12] VM: Split out the accounting of unstable writes from BDI_RECLAIMABLE Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 02/12] VM: Don't call bdi_stat(BDI_UNSTABLE) on non-nfs backing-devices Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 08/12] NFS: Simplify nfs_wb_page_cancel() Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15   ` [PATCH 10/12] NFS: Simplify nfs_wb_page() Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
     [not found]     ` <20100125221545.16750.19154.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-03-10 18:51       ` J. R. Okajima
2010-03-10 18:51         ` J. R. Okajima
2010-03-10 19:31         ` Trond Myklebust
2010-03-10 19:31           ` Trond Myklebust
     [not found]           ` <1268249482.3096.76.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-03-10 20:18             ` Trond Myklebust
2010-03-10 20:18               ` Trond Myklebust
     [not found]               ` <1268252300.3096.81.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-03-11  4:45                 ` J. R. Okajima
2010-03-11  4:45                   ` J. R. Okajima
2010-03-11 14:26                   ` Trond Myklebust
2010-03-11 14:26                     ` Trond Myklebust
     [not found]                     ` <1268317582.3354.9.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-03-12  4:22                       ` J. R. Okajima
2010-03-12  4:22                         ` J. R. Okajima
2010-03-17 16:49                       ` Christoph Hellwig
2010-03-17 16:49                         ` Christoph Hellwig
2010-03-17 17:26                         ` Trond Myklebust
2010-03-17 17:52                           ` Jeff Layton
2010-03-17 17:58                             ` Trond Myklebust
     [not found]                               ` <1268848682.8335.5.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-03-17 18:08                                 ` Jeff Layton
2010-03-17 18:08                                   ` Jeff Layton
2010-01-25 22:15   ` [PATCH 12/12] NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping Trond Myklebust
2010-01-25 22:15     ` Trond Myklebust
2010-01-25 22:15 ` [PATCH 11/12] NFS: Clean up nfs_sync_mapping Trond Myklebust

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.