* [PATCH 04/12] NFS: Reduce the number of unnecessary COMMIT calls
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
If the caller is doing a non-blocking flush, and there are still writebacks
pending on the wire, we can usually defer the COMMIT call until those
writes are done.
Also ensure that we honour the wbc->nonblocking flag.
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/write.c | 17 ++++++++++++++---
1 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e2e7464..0bc1d5b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1409,12 +1409,23 @@ static int nfs_commit_inode(struct inode *inode, int how)
static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
{
- int ret;
+ int flags = FLUSH_SYNC;
+ int ret = 0;
- ret = nfs_commit_inode(inode,
- wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+ /* Don't commit yet if this is a non-blocking flush and there are
+ * outstanding writes for this mapping.
+ */
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ radix_tree_tagged(&NFS_I(inode)->nfs_page_tree,
+ NFS_PAGE_TAG_LOCKED))
+ goto out_mark_dirty;
+
+ if (wbc->nonblocking)
+ flags = 0;
+ ret = nfs_commit_inode(inode, flags);
if (ret >= 0)
return 0;
+out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 04/12] NFS: Reduce the number of unnecessary COMMIT calls
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
If the caller is doing a non-blocking flush, and there are still writebacks
pending on the wire, we can usually defer the COMMIT call until those
writes are done.
Also ensure that we honour the wbc->nonblocking flag.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/write.c | 17 ++++++++++++++---
1 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e2e7464..0bc1d5b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1409,12 +1409,23 @@ static int nfs_commit_inode(struct inode *inode, int how)
static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
{
- int ret;
+ int flags = FLUSH_SYNC;
+ int ret = 0;
- ret = nfs_commit_inode(inode,
- wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+ /* Don't commit yet if this is a non-blocking flush and there are
+ * outstanding writes for this mapping.
+ */
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ radix_tree_tagged(&NFS_I(inode)->nfs_page_tree,
+ NFS_PAGE_TAG_LOCKED))
+ goto out_mark_dirty;
+
+ if (wbc->nonblocking)
+ flags = 0;
+ ret = nfs_commit_inode(inode, flags);
if (ret >= 0)
return 0;
+out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
}
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 03/12] NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
The sole purpose of nfs_write_inode is to commit unstable writes, so
move it into fs/nfs/write.c, and make nfs_commit_inode static.
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/inode.c | 12 ------------
fs/nfs/write.c | 24 +++++++++++++++++++++++-
include/linux/nfs_fs.h | 7 -------
3 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 302aa89..8341709 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid)
return ino;
}
-int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- int ret;
-
- ret = nfs_commit_inode(inode,
- wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
- if (ret >= 0)
- return 0;
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- return ret;
-}
-
void nfs_clear_inode(struct inode *inode)
{
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7ba56f8..e2e7464 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
.rpc_release = nfs_commit_release,
};
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
int res;
@@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how)
}
return res;
}
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+ int ret;
+
+ ret = nfs_commit_inode(inode,
+ wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+ if (ret >= 0)
+ return 0;
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ return ret;
+}
#else
static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
return 0;
}
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+ return 0;
+}
#endif
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ return nfs_commit_unstable_pages(inode, wbc);
+}
+
long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
{
struct inode *inode = mapping->host;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d09db1b..384ea3e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_write_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_write_data *wdata);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
- return 0;
-}
#endif
static inline int
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 03/12] NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
The sole purpose of nfs_write_inode is to commit unstable writes, so
move it into fs/nfs/write.c, and make nfs_commit_inode static.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/inode.c | 12 ------------
fs/nfs/write.c | 24 +++++++++++++++++++++++-
include/linux/nfs_fs.h | 7 -------
3 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 302aa89..8341709 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid)
return ino;
}
-int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- int ret;
-
- ret = nfs_commit_inode(inode,
- wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
- if (ret >= 0)
- return 0;
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- return ret;
-}
-
void nfs_clear_inode(struct inode *inode)
{
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7ba56f8..e2e7464 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
.rpc_release = nfs_commit_release,
};
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
int res;
@@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how)
}
return res;
}
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+ int ret;
+
+ ret = nfs_commit_inode(inode,
+ wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+ if (ret >= 0)
+ return 0;
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ return ret;
+}
#else
static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
return 0;
}
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+ return 0;
+}
#endif
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ return nfs_commit_unstable_pages(inode, wbc);
+}
+
long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
{
struct inode *inode = mapping->host;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d09db1b..384ea3e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_write_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_write_data *wdata);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
- return 0;
-}
#endif
static inline int
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 06/12] NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Acked-by: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Acked-by: Wu Fengguang <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
fs/nfs/write.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9feafab..b0b3890 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1420,7 +1420,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
NFS_PAGE_TAG_LOCKED))
goto out_mark_dirty;
- if (wbc->nonblocking)
+ if (wbc->nonblocking || wbc->for_background)
flags = 0;
ret = nfs_commit_inode(inode, flags);
if (ret >= 0)
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 06/12] NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/nfs/write.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9feafab..b0b3890 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1420,7 +1420,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
NFS_PAGE_TAG_LOCKED))
goto out_mark_dirty;
- if (wbc->nonblocking)
+ if (wbc->nonblocking || wbc->for_background)
flags = 0;
ret = nfs_commit_inode(inode, flags);
if (ret >= 0)
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Now that we have correct COMMIT semantics in writeback_single_inode...
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/write.c | 39 ++++++---------------------------------
include/linux/nfs_fs.h | 1 -
2 files changed, 6 insertions(+), 34 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..747b40e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
pgoff_t idx_start, idx_end;
unsigned int npages = 0;
LIST_HEAD(head);
- int nocommit = how & FLUSH_NOCOMMIT;
long pages, ret;
/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
npages = 0;
}
}
- how &= ~FLUSH_NOCOMMIT;
spin_lock(&inode->i_lock);
do {
ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
if (ret != 0)
continue;
- if (nocommit)
- break;
pages = nfs_scan_commit(inode, &head, idx_start, npages);
if (pages == 0)
break;
@@ -1481,47 +1477,24 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
return ret;
}
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
- int ret;
-
- ret = nfs_writepages(mapping, wbc);
- if (ret < 0)
- goto out;
- ret = nfs_sync_mapping_wait(mapping, wbc, how);
- if (ret < 0)
- goto out;
- return 0;
-out:
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
{
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
};
- return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
- return nfs_write_mapping(inode->i_mapping, 0);
+ return sync_inode(inode, &wbc);
}
int nfs_wb_nocommit(struct inode *inode)
{
- return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+ return filemap_write_and_wait(inode->i_mapping);
}
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..c536caf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
#define FLUSH_STABLE 4 /* commit to stable storage */
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */
#ifdef __KERNEL__
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
Now that we have correct COMMIT semantics in writeback_single_inode...
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/write.c | 39 ++++++---------------------------------
include/linux/nfs_fs.h | 1 -
2 files changed, 6 insertions(+), 34 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..747b40e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
pgoff_t idx_start, idx_end;
unsigned int npages = 0;
LIST_HEAD(head);
- int nocommit = how & FLUSH_NOCOMMIT;
long pages, ret;
/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
npages = 0;
}
}
- how &= ~FLUSH_NOCOMMIT;
spin_lock(&inode->i_lock);
do {
ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
if (ret != 0)
continue;
- if (nocommit)
- break;
pages = nfs_scan_commit(inode, &head, idx_start, npages);
if (pages == 0)
break;
@@ -1481,47 +1477,24 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
return ret;
}
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
- int ret;
-
- ret = nfs_writepages(mapping, wbc);
- if (ret < 0)
- goto out;
- ret = nfs_sync_mapping_wait(mapping, wbc, how);
- if (ret < 0)
- goto out;
- return 0;
-out:
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
{
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
};
- return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
- return nfs_write_mapping(inode->i_mapping, 0);
+ return sync_inode(inode, &wbc);
}
int nfs_wb_nocommit(struct inode *inode)
{
- return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+ return filemap_write_and_wait(inode->i_mapping);
}
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..c536caf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
#define FLUSH_STABLE 4 /* commit to stable storage */
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */
#ifdef __KERNEL__
^ permalink raw reply related [flat|nested] 48+ messages in thread
[parent not found: <20100125221545.16750.63968.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
2010-01-25 22:15 ` Trond Myklebust
@ 2010-01-26 11:21 ` Christoph Hellwig
-1 siblings, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-01-26 11:21 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> int nfs_wb_nocommit(struct inode *inode)
> {
> - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> + return filemap_write_and_wait(inode->i_mapping);
Any point in keeping this as a wrapper for a single well-documented
caller? Also taking i_mutex around it seems a bit questionable these
days given that filemap_write_and_wait avoids lifelocks with writing
applications okay and we use it without i_mutex all over the place.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-26 11:21 ` Christoph Hellwig
0 siblings, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-01-26 11:21 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> int nfs_wb_nocommit(struct inode *inode)
> {
> - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> + return filemap_write_and_wait(inode->i_mapping);
Any point in keeping this as a wrapper for a single well-documented
caller? Also taking i_mutex around it seems a bit questionable these
days given that filemap_write_and_wait avoids lifelocks with writing
applications okay and we use it without i_mutex all over the place.
^ permalink raw reply [flat|nested] 48+ messages in thread
[parent not found: <20100126112148.GA25170-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>]
* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
2010-01-26 11:21 ` Christoph Hellwig
@ 2010-01-26 14:02 ` Trond Myklebust
-1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 14:02 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote:
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> > int nfs_wb_nocommit(struct inode *inode)
> > {
> > - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > + return filemap_write_and_wait(inode->i_mapping);
>
> Any point in keeping this as a wrapper for a single well-documented
> caller? Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
>
Agreed, but just out of curiosity. Is there any reason why we shouldn't
use filemap_flush() + filemap_fdatawait() here instead? We're not really
interested in doing a full data integrity flush, but just want to make
sure that pages which were dirtied before the stat() syscall are flushed
to disk so that the server updates the c/mtime for us.
Cheers
Trond
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-26 14:02 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 14:02 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote:
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> > int nfs_wb_nocommit(struct inode *inode)
> > {
> > - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > + return filemap_write_and_wait(inode->i_mapping);
>
> Any point in keeping this as a wrapper for a single well-documented
> caller? Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
>
Agreed, but just out of curiosity. Is there any reason why we shouldn't
use filemap_flush() + filemap_fdatawait() here instead? We're not really
interested in doing a full data integrity flush, but just want to make
sure that pages which were dirtied before the stat() syscall are flushed
to disk so that the server updates the c/mtime for us.
Cheers
Trond
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
2010-01-26 11:21 ` Christoph Hellwig
@ 2010-01-26 23:17 ` Trond Myklebust
-1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 23:17 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote:
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> > int nfs_wb_nocommit(struct inode *inode)
> > {
> > - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > + return filemap_write_and_wait(inode->i_mapping);
>
> Any point in keeping this as a wrapper for a single well-documented
> caller? Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
>
I'm replacing the former patch with the following updated version.
Cheers
Trond
---------------------------------------------------------------------------------------------
NFS: Replace __nfs_write_mapping with sync_inode()
From: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Now that we have correct COMMIT semantics in writeback_single_inode, we can
reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a
call to filemap_write_and_wait(), which doesn't need to hold the
inode->i_mutex.
With that done, we can eliminate nfs_write_mapping() altogether.
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/inode.c | 15 +++++----------
fs/nfs/write.c | 42 +++++-------------------------------------
include/linux/nfs_fs.h | 2 --
3 files changed, 10 insertions(+), 49 deletions(-)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8341709..733cb27 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
int err;
- /*
- * Flush out writes to the server in order to update c/mtime.
- *
- * Hold the i_mutex to suspend application writes temporarily;
- * this prevents long-running writing applications from blocking
- * nfs_wb_nocommit.
- */
+ /* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
- nfs_wb_nocommit(inode);
- mutex_unlock(&inode->i_mutex);
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto out;
}
/*
@@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
}
+out:
return err;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..c28cf57 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
pgoff_t idx_start, idx_end;
unsigned int npages = 0;
LIST_HEAD(head);
- int nocommit = how & FLUSH_NOCOMMIT;
long pages, ret;
/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
npages = 0;
}
}
- how &= ~FLUSH_NOCOMMIT;
spin_lock(&inode->i_lock);
do {
ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
if (ret != 0)
continue;
- if (nocommit)
- break;
pages = nfs_scan_commit(inode, &head, idx_start, npages);
if (pages == 0)
break;
@@ -1481,47 +1477,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
return ret;
}
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
- int ret;
-
- ret = nfs_writepages(mapping, wbc);
- if (ret < 0)
- goto out;
- ret = nfs_sync_mapping_wait(mapping, wbc, how);
- if (ret < 0)
- goto out;
- return 0;
-out:
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
{
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
};
- return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
- return nfs_write_mapping(inode->i_mapping, 0);
-}
-
-int nfs_wb_nocommit(struct inode *inode)
-{
- return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+ return sync_inode(inode, &wbc);
}
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..3383622 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
#define FLUSH_STABLE 4 /* commit to stable storage */
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */
#ifdef __KERNEL__
@@ -477,7 +476,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
*/
extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_nocommit(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH 09/12] NFS: Replace __nfs_write_mapping with sync_inode()
@ 2010-01-26 23:17 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-26 23:17 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
On Tue, 2010-01-26 at 06:21 -0500, Christoph Hellwig wrote:
> On Mon, Jan 25, 2010 at 05:15:45PM -0500, Trond Myklebust wrote:
> > int nfs_wb_nocommit(struct inode *inode)
> > {
> > - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
> > + return filemap_write_and_wait(inode->i_mapping);
>
> Any point in keeping this as a wrapper for a single well-documented
> caller? Also taking i_mutex around it seems a bit questionable these
> days given that filemap_write_and_wait avoids lifelocks with writing
> applications okay and we use it without i_mutex all over the place.
>
I'm replacing the former patch with the following updated version.
Cheers
Trond
---------------------------------------------------------------------------------------------
NFS: Replace __nfs_write_mapping with sync_inode()
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Now that we have correct COMMIT semantics in writeback_single_inode, we can
reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a
call to filemap_write_and_wait(), which doesn't need to hold the
inode->i_mutex.
With that done, we can eliminate nfs_write_mapping() altogether.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/inode.c | 15 +++++----------
fs/nfs/write.c | 42 +++++-------------------------------------
include/linux/nfs_fs.h | 2 --
3 files changed, 10 insertions(+), 49 deletions(-)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8341709..733cb27 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
int err;
- /*
- * Flush out writes to the server in order to update c/mtime.
- *
- * Hold the i_mutex to suspend application writes temporarily;
- * this prevents long-running writing applications from blocking
- * nfs_wb_nocommit.
- */
+ /* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
- nfs_wb_nocommit(inode);
- mutex_unlock(&inode->i_mutex);
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto out;
}
/*
@@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
}
+out:
return err;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53df027..c28cf57 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1443,7 +1443,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
pgoff_t idx_start, idx_end;
unsigned int npages = 0;
LIST_HEAD(head);
- int nocommit = how & FLUSH_NOCOMMIT;
long pages, ret;
/* FIXME */
@@ -1460,14 +1459,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
npages = 0;
}
}
- how &= ~FLUSH_NOCOMMIT;
spin_lock(&inode->i_lock);
do {
ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
if (ret != 0)
continue;
- if (nocommit)
- break;
pages = nfs_scan_commit(inode, &head, idx_start, npages);
if (pages == 0)
break;
@@ -1481,47 +1477,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
return ret;
}
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
- int ret;
-
- ret = nfs_writepages(mapping, wbc);
- if (ret < 0)
- goto out;
- ret = nfs_sync_mapping_wait(mapping, wbc, how);
- if (ret < 0)
- goto out;
- return 0;
-out:
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
{
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
};
- return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
- return nfs_write_mapping(inode->i_mapping, 0);
-}
-
-int nfs_wb_nocommit(struct inode *inode)
-{
- return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+ return sync_inode(inode, &wbc);
}
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1eec414..3383622 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
#define FLUSH_STABLE 4 /* commit to stable storage */
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */
#ifdef __KERNEL__
@@ -477,7 +476,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
*/
extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_nocommit(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 07/12] NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Since nfs_scan_list() doesn't wait for locked pages, we have a race in
which it is possible to end up with an inode that needs to send a COMMIT,
but which does not have the I_DIRTY_DATASYNC flag set.
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/write.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b0b3890..2d78f08 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -573,11 +573,15 @@ static int
nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int ret;
if (!nfs_need_commit(nfsi))
return 0;
- return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+ ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+ if (nfs_need_commit(NFS_I(inode)))
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ return ret;
}
#else
static inline int nfs_need_commit(struct nfs_inode *nfsi)
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 07/12] NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
Since nfs_scan_list() doesn't wait for locked pages, we have a race in
which it is possible to end up with an inode that needs to send a COMMIT,
but which does not have the I_DIRTY_DATASYNC flag set.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/write.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b0b3890..2d78f08 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -573,11 +573,15 @@ static int
nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int ret;
if (!nfs_need_commit(nfsi))
return 0;
- return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+ ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+ if (nfs_need_commit(NFS_I(inode)))
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ return ret;
}
#else
static inline int nfs_need_commit(struct nfs_inode *nfsi)
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 01/12] VM: Split out the accounting of unstable writes from BDI_RECLAIMABLE
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
From: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Signed-off-by: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Acked-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
Reviewed-by: Wu Fengguang <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
fs/nfs/write.c | 6 +++---
include/linux/backing-dev.h | 3 ++-
mm/backing-dev.c | 6 ++++--
mm/filemap.c | 2 +-
mm/page-writeback.c | 16 ++++++++++------
mm/truncate.c | 2 +-
6 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696..7ba56f8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,7 +440,7 @@ nfs_mark_request_commit(struct nfs_page *req)
NFS_PAGE_TAG_COMMIT);
spin_unlock(&inode->i_lock);
inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+ inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_UNSTABLE);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
@@ -451,7 +451,7 @@ nfs_clear_request_commit(struct nfs_page *req)
if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
dec_zone_page_state(page, NR_UNSTABLE_NFS);
- dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+ dec_bdi_stat(page->mapping->backing_dev_info, BDI_UNSTABLE);
return 1;
}
return 0;
@@ -1322,7 +1322,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
nfs_mark_request_commit(req);
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ BDI_UNSTABLE);
nfs_clear_page_tag_locked(req);
}
return -ENOMEM;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index fcbc26a..42c3e2a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -36,7 +36,8 @@ enum bdi_state {
typedef int (congested_fn)(void *, int);
enum bdi_stat_item {
- BDI_RECLAIMABLE,
+ BDI_DIRTY,
+ BDI_UNSTABLE,
BDI_WRITEBACK,
NR_BDI_STAT_ITEMS
};
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..88f3655 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -88,7 +88,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
#define K(x) ((x) << (PAGE_SHIFT - 10))
seq_printf(m,
"BdiWriteback: %8lu kB\n"
- "BdiReclaimable: %8lu kB\n"
+ "BdiDirty: %8lu kB\n"
+ "BdiUnstable: %8lu kB\n"
"BdiDirtyThresh: %8lu kB\n"
"DirtyThresh: %8lu kB\n"
"BackgroundThresh: %8lu kB\n"
@@ -102,7 +103,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"wb_list: %8u\n"
"wb_cnt: %8u\n",
(unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
- (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
+ (unsigned long) K(bdi_stat(bdi, BDI_DIRTY)),
+ (unsigned long) K(bdi_stat(bdi, BDI_UNSTABLE)),
K(bdi_thresh), K(dirty_thresh),
K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
!list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0..458387d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -136,7 +136,7 @@ void __remove_from_page_cache(struct page *page)
*/
if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
- dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
}
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b19943..23d3fc6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -272,7 +272,8 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
else
avail_dirty = 0;
- avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +
+ avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
+ bdi_stat(bdi, BDI_UNSTABLE) +
bdi_stat(bdi, BDI_WRITEBACK);
*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
@@ -509,7 +510,8 @@ static void balance_dirty_pages(struct address_space *mapping,
global_page_state(NR_UNSTABLE_NFS);
nr_writeback = global_page_state(NR_WRITEBACK);
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+ bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -554,10 +556,12 @@ static void balance_dirty_pages(struct address_space *mapping,
* deltas.
*/
if (bdi_thresh < 2*bdi_stat_error(bdi)) {
- bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
+ bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+ bdi_stat_sum(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
} else if (bdi_nr_reclaimable) {
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+ bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
}
@@ -1079,7 +1083,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
{
if (mapping_cap_account_dirty(mapping)) {
__inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
task_dirty_inc(current);
task_io_account_write(PAGE_CACHE_SIZE);
}
@@ -1255,7 +1259,7 @@ int clear_page_dirty_for_io(struct page *page)
if (TestClearPageDirty(page)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ BDI_DIRTY);
return 1;
}
return 0;
diff --git a/mm/truncate.c b/mm/truncate.c
index e87e372..2466e0c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -75,7 +75,7 @@ void cancel_dirty_page(struct page *page, unsigned int account_size)
if (mapping && mapping_cap_account_dirty(mapping)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ BDI_DIRTY);
if (account_size)
task_io_account_cancelled_write(account_size);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 01/12] VM: Split out the accounting of unstable writes from BDI_RECLAIMABLE
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
From: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/nfs/write.c | 6 +++---
include/linux/backing-dev.h | 3 ++-
mm/backing-dev.c | 6 ++++--
mm/filemap.c | 2 +-
mm/page-writeback.c | 16 ++++++++++------
mm/truncate.c | 2 +-
6 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696..7ba56f8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,7 +440,7 @@ nfs_mark_request_commit(struct nfs_page *req)
NFS_PAGE_TAG_COMMIT);
spin_unlock(&inode->i_lock);
inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+ inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_UNSTABLE);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
@@ -451,7 +451,7 @@ nfs_clear_request_commit(struct nfs_page *req)
if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
dec_zone_page_state(page, NR_UNSTABLE_NFS);
- dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+ dec_bdi_stat(page->mapping->backing_dev_info, BDI_UNSTABLE);
return 1;
}
return 0;
@@ -1322,7 +1322,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
nfs_mark_request_commit(req);
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ BDI_UNSTABLE);
nfs_clear_page_tag_locked(req);
}
return -ENOMEM;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index fcbc26a..42c3e2a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -36,7 +36,8 @@ enum bdi_state {
typedef int (congested_fn)(void *, int);
enum bdi_stat_item {
- BDI_RECLAIMABLE,
+ BDI_DIRTY,
+ BDI_UNSTABLE,
BDI_WRITEBACK,
NR_BDI_STAT_ITEMS
};
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca03..88f3655 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -88,7 +88,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
#define K(x) ((x) << (PAGE_SHIFT - 10))
seq_printf(m,
"BdiWriteback: %8lu kB\n"
- "BdiReclaimable: %8lu kB\n"
+ "BdiDirty: %8lu kB\n"
+ "BdiUnstable: %8lu kB\n"
"BdiDirtyThresh: %8lu kB\n"
"DirtyThresh: %8lu kB\n"
"BackgroundThresh: %8lu kB\n"
@@ -102,7 +103,8 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"wb_list: %8u\n"
"wb_cnt: %8u\n",
(unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
- (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
+ (unsigned long) K(bdi_stat(bdi, BDI_DIRTY)),
+ (unsigned long) K(bdi_stat(bdi, BDI_UNSTABLE)),
K(bdi_thresh), K(dirty_thresh),
K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
!list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0..458387d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -136,7 +136,7 @@ void __remove_from_page_cache(struct page *page)
*/
if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
- dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
}
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b19943..23d3fc6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -272,7 +272,8 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
else
avail_dirty = 0;
- avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +
+ avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
+ bdi_stat(bdi, BDI_UNSTABLE) +
bdi_stat(bdi, BDI_WRITEBACK);
*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
@@ -509,7 +510,8 @@ static void balance_dirty_pages(struct address_space *mapping,
global_page_state(NR_UNSTABLE_NFS);
nr_writeback = global_page_state(NR_WRITEBACK);
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+ bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -554,10 +556,12 @@ static void balance_dirty_pages(struct address_space *mapping,
* deltas.
*/
if (bdi_thresh < 2*bdi_stat_error(bdi)) {
- bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
+ bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+ bdi_stat_sum(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
} else if (bdi_nr_reclaimable) {
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+ bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
}
@@ -1079,7 +1083,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
{
if (mapping_cap_account_dirty(mapping)) {
__inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTY);
task_dirty_inc(current);
task_io_account_write(PAGE_CACHE_SIZE);
}
@@ -1255,7 +1259,7 @@ int clear_page_dirty_for_io(struct page *page)
if (TestClearPageDirty(page)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ BDI_DIRTY);
return 1;
}
return 0;
diff --git a/mm/truncate.c b/mm/truncate.c
index e87e372..2466e0c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -75,7 +75,7 @@ void cancel_dirty_page(struct page *page, unsigned int account_size)
if (mapping && mapping_cap_account_dirty(mapping)) {
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ BDI_DIRTY);
if (account_size)
task_io_account_cancelled_write(account_size);
}
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 02/12] VM: Don't call bdi_stat(BDI_UNSTABLE) on non-nfs backing-devices
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Speeds up the accounting in balance_dirty_pages() for non-nfs devices.
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Acked-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
Acked-by: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Reviewed-by: Wu Fengguang <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
fs/nfs/client.c | 1 +
include/linux/backing-dev.h | 6 ++++++
mm/page-writeback.c | 16 +++++++++++-----
3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee77713..d0b060a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -890,6 +890,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
server->backing_dev_info.name = "nfs";
server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+ server->backing_dev_info.capabilities |= BDI_CAP_ACCT_UNSTABLE;
if (server->wsize > max_rpc_payload)
server->wsize = max_rpc_payload;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 42c3e2a..8b45166 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -232,6 +232,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
#define BDI_CAP_EXEC_MAP 0x00000040
#define BDI_CAP_NO_ACCT_WB 0x00000080
#define BDI_CAP_SWAP_BACKED 0x00000100
+#define BDI_CAP_ACCT_UNSTABLE 0x00000200
#define BDI_CAP_VMFLAGS \
(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -311,6 +312,11 @@ static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
return bdi == &default_backing_dev_info;
}
+static inline bool bdi_cap_account_unstable(struct backing_dev_info *bdi)
+{
+ return bdi->capabilities & BDI_CAP_ACCT_UNSTABLE;
+}
+
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 23d3fc6..c06739b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -273,8 +273,9 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
avail_dirty = 0;
avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
- bdi_stat(bdi, BDI_UNSTABLE) +
bdi_stat(bdi, BDI_WRITEBACK);
+ if (bdi_cap_account_unstable(bdi))
+ avail_dirty += bdi_stat(bdi, BDI_UNSTABLE);
*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
}
@@ -510,8 +511,9 @@ static void balance_dirty_pages(struct address_space *mapping,
global_page_state(NR_UNSTABLE_NFS);
nr_writeback = global_page_state(NR_WRITEBACK);
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
- bdi_stat(bdi, BDI_UNSTABLE);
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -556,11 +558,15 @@ static void balance_dirty_pages(struct address_space *mapping,
* deltas.
*/
if (bdi_thresh < 2*bdi_stat_error(bdi)) {
- bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+ bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable +=
bdi_stat_sum(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
} else if (bdi_nr_reclaimable) {
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable +=
bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 02/12] VM: Don't call bdi_stat(BDI_UNSTABLE) on non-nfs backing-devices
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
Speeds up the accounting in balance_dirty_pages() for non-nfs devices.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
---
fs/nfs/client.c | 1 +
include/linux/backing-dev.h | 6 ++++++
mm/page-writeback.c | 16 +++++++++++-----
3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee77713..d0b060a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -890,6 +890,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
server->backing_dev_info.name = "nfs";
server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+ server->backing_dev_info.capabilities |= BDI_CAP_ACCT_UNSTABLE;
if (server->wsize > max_rpc_payload)
server->wsize = max_rpc_payload;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 42c3e2a..8b45166 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -232,6 +232,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
#define BDI_CAP_EXEC_MAP 0x00000040
#define BDI_CAP_NO_ACCT_WB 0x00000080
#define BDI_CAP_SWAP_BACKED 0x00000100
+#define BDI_CAP_ACCT_UNSTABLE 0x00000200
#define BDI_CAP_VMFLAGS \
(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -311,6 +312,11 @@ static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
return bdi == &default_backing_dev_info;
}
+static inline bool bdi_cap_account_unstable(struct backing_dev_info *bdi)
+{
+ return bdi->capabilities & BDI_CAP_ACCT_UNSTABLE;
+}
+
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 23d3fc6..c06739b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -273,8 +273,9 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
avail_dirty = 0;
avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
- bdi_stat(bdi, BDI_UNSTABLE) +
bdi_stat(bdi, BDI_WRITEBACK);
+ if (bdi_cap_account_unstable(bdi))
+ avail_dirty += bdi_stat(bdi, BDI_UNSTABLE);
*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
}
@@ -510,8 +511,9 @@ static void balance_dirty_pages(struct address_space *mapping,
global_page_state(NR_UNSTABLE_NFS);
nr_writeback = global_page_state(NR_WRITEBACK);
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
- bdi_stat(bdi, BDI_UNSTABLE);
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -556,11 +558,15 @@ static void balance_dirty_pages(struct address_space *mapping,
* deltas.
*/
if (bdi_thresh < 2*bdi_stat_error(bdi)) {
- bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+ bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable +=
bdi_stat_sum(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
} else if (bdi_nr_reclaimable) {
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable +=
bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
}
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 08/12] NFS: Simplify nfs_wb_page_cancel()
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
In all cases we should be able to just remove the request and call
cancel_dirty_page().
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/write.c | 39 +--------------------------------------
include/linux/nfs_fs.h | 2 --
2 files changed, 1 insertions(+), 40 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2d78f08..53df027 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -539,19 +539,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
return res;
}
-static void nfs_cancel_commit_list(struct list_head *head)
-{
- struct nfs_page *req;
-
- while(!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_clear_request_commit(req);
- nfs_inode_remove_request(req);
- nfs_unlock_request(req);
- }
-}
-
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int
nfs_need_commit(struct nfs_inode *nfsi)
@@ -1484,13 +1471,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
pages = nfs_scan_commit(inode, &head, idx_start, npages);
if (pages == 0)
break;
- if (how & FLUSH_INVALIDATE) {
- spin_unlock(&inode->i_lock);
- nfs_cancel_commit_list(&head);
- ret = pages;
- spin_lock(&inode->i_lock);
- continue;
- }
pages += nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&inode->i_lock);
ret = nfs_commit_list(inode, &head, how);
@@ -1547,26 +1527,13 @@ int nfs_wb_nocommit(struct inode *inode)
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
{
struct nfs_page *req;
- loff_t range_start = page_offset(page);
- loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
- struct writeback_control wbc = {
- .bdi = page->mapping->backing_dev_info,
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .range_start = range_start,
- .range_end = range_end,
- };
int ret = 0;
BUG_ON(!PageLocked(page));
for (;;) {
req = nfs_page_find_request(page);
if (req == NULL)
- goto out;
- if (test_bit(PG_CLEAN, &req->wb_flags)) {
- nfs_release_request(req);
break;
- }
if (nfs_lock_request_dontget(req)) {
nfs_inode_remove_request(req);
/*
@@ -1579,12 +1546,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
}
ret = nfs_wait_on_request(req);
if (ret < 0)
- goto out;
+ break;
}
- if (!PagePrivate(page))
- return 0;
- ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
-out:
return ret;
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 384ea3e..1eec414 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -34,8 +34,6 @@
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */
-#define FLUSH_INVALIDATE 64 /* Invalidate the page cache */
-#define FLUSH_NOWRITEPAGE 128 /* Don't call writepage() */
#ifdef __KERNEL__
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 08/12] NFS: Simplify nfs_wb_page_cancel()
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
In all cases we should be able to just remove the request and call
cancel_dirty_page().
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/write.c | 39 +--------------------------------------
include/linux/nfs_fs.h | 2 --
2 files changed, 1 insertions(+), 40 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2d78f08..53df027 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -539,19 +539,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
return res;
}
-static void nfs_cancel_commit_list(struct list_head *head)
-{
- struct nfs_page *req;
-
- while(!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_clear_request_commit(req);
- nfs_inode_remove_request(req);
- nfs_unlock_request(req);
- }
-}
-
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int
nfs_need_commit(struct nfs_inode *nfsi)
@@ -1484,13 +1471,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
pages = nfs_scan_commit(inode, &head, idx_start, npages);
if (pages == 0)
break;
- if (how & FLUSH_INVALIDATE) {
- spin_unlock(&inode->i_lock);
- nfs_cancel_commit_list(&head);
- ret = pages;
- spin_lock(&inode->i_lock);
- continue;
- }
pages += nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&inode->i_lock);
ret = nfs_commit_list(inode, &head, how);
@@ -1547,26 +1527,13 @@ int nfs_wb_nocommit(struct inode *inode)
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
{
struct nfs_page *req;
- loff_t range_start = page_offset(page);
- loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
- struct writeback_control wbc = {
- .bdi = page->mapping->backing_dev_info,
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .range_start = range_start,
- .range_end = range_end,
- };
int ret = 0;
BUG_ON(!PageLocked(page));
for (;;) {
req = nfs_page_find_request(page);
if (req == NULL)
- goto out;
- if (test_bit(PG_CLEAN, &req->wb_flags)) {
- nfs_release_request(req);
break;
- }
if (nfs_lock_request_dontget(req)) {
nfs_inode_remove_request(req);
/*
@@ -1579,12 +1546,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
}
ret = nfs_wait_on_request(req);
if (ret < 0)
- goto out;
+ break;
}
- if (!PagePrivate(page))
- return 0;
- ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
-out:
return ret;
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 384ea3e..1eec414 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -34,8 +34,6 @@
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */
-#define FLUSH_INVALIDATE 64 /* Invalidate the page cache */
-#define FLUSH_NOWRITEPAGE 128 /* Don't call writepage() */
#ifdef __KERNEL__
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 10/12] NFS: Simplify nfs_wb_page()
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/write.c | 118 +++++++++---------------------------------------
include/linux/nfs_fs.h | 1
2 files changed, 22 insertions(+), 97 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 747b40e..830613d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -501,44 +501,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
}
#endif
-/*
- * Wait for a request to complete.
- *
- * Interruptible by fatal signals only.
- */
-static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_page *req;
- pgoff_t idx_end, next;
- unsigned int res = 0;
- int error;
-
- if (npages == 0)
- idx_end = ~0;
- else
- idx_end = idx_start + npages - 1;
-
- next = idx_start;
- while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
- if (req->wb_index > idx_end)
- break;
-
- next = req->wb_index + 1;
- BUG_ON(!NFS_WBACK_BUSY(req));
-
- kref_get(&req->wb_kref);
- spin_unlock(&inode->i_lock);
- error = nfs_wait_on_request(req);
- nfs_release_request(req);
- spin_lock(&inode->i_lock);
- if (error < 0)
- return error;
- res++;
- }
- return res;
-}
-
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int
nfs_need_commit(struct nfs_inode *nfsi)
@@ -1437,46 +1399,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
return nfs_commit_unstable_pages(inode, wbc);
}
-long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
- struct inode *inode = mapping->host;
- pgoff_t idx_start, idx_end;
- unsigned int npages = 0;
- LIST_HEAD(head);
- long pages, ret;
-
- /* FIXME */
- if (wbc->range_cyclic)
- idx_start = 0;
- else {
- idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
- idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (idx_end > idx_start) {
- pgoff_t l_npages = 1 + idx_end - idx_start;
- npages = l_npages;
- if (sizeof(npages) != sizeof(l_npages) &&
- (pgoff_t)npages != l_npages)
- npages = 0;
- }
- }
- spin_lock(&inode->i_lock);
- do {
- ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
- if (ret != 0)
- continue;
- pages = nfs_scan_commit(inode, &head, idx_start, npages);
- if (pages == 0)
- break;
- pages += nfs_scan_commit(inode, &head, 0, 0);
- spin_unlock(&inode->i_lock);
- ret = nfs_commit_list(inode, &head, how);
- spin_lock(&inode->i_lock);
-
- } while (ret >= 0);
- spin_unlock(&inode->i_lock);
- return ret;
-}
-
/*
* flush the inode to disk.
*/
@@ -1524,45 +1446,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
return ret;
}
-static int nfs_wb_page_priority(struct inode *inode, struct page *page,
- int how)
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page *page)
{
loff_t range_start = page_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
struct writeback_control wbc = {
- .bdi = page->mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
+ .nr_to_write = 0,
.range_start = range_start,
.range_end = range_end,
};
+ struct nfs_page *req;
+ int need_commit;
int ret;
- do {
+ while(PagePrivate(page)) {
if (clear_page_dirty_for_io(page)) {
ret = nfs_writepage_locked(page, &wbc);
if (ret < 0)
goto out_error;
- } else if (!PagePrivate(page))
+ }
+ req = nfs_find_and_lock_request(page);
+ if (!req)
break;
- ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
- if (ret < 0)
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
goto out_error;
- } while (PagePrivate(page));
+ }
+ need_commit = test_bit(PG_CLEAN, &req->wb_flags);
+ nfs_clear_page_tag_locked(req);
+ if (need_commit) {
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (ret < 0)
+ goto out_error;
+ }
+ }
return 0;
out_error:
- __mark_inode_dirty(inode, I_DIRTY_PAGES);
return ret;
}
-/*
- * Write back all requests on one page - we do this before reading it.
- */
-int nfs_wb_page(struct inode *inode, struct page* page)
-{
- return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
-}
-
#ifdef CONFIG_MIGRATION
int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c536caf..6ee74c5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -474,7 +474,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
* Try to write back everything synchronously (but check the
* return value!)
*/
-extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
extern int nfs_wb_all(struct inode *inode);
extern int nfs_wb_nocommit(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page);
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/write.c | 118 +++++++++---------------------------------------
include/linux/nfs_fs.h | 1
2 files changed, 22 insertions(+), 97 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 747b40e..830613d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -501,44 +501,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
}
#endif
-/*
- * Wait for a request to complete.
- *
- * Interruptible by fatal signals only.
- */
-static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_page *req;
- pgoff_t idx_end, next;
- unsigned int res = 0;
- int error;
-
- if (npages == 0)
- idx_end = ~0;
- else
- idx_end = idx_start + npages - 1;
-
- next = idx_start;
- while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
- if (req->wb_index > idx_end)
- break;
-
- next = req->wb_index + 1;
- BUG_ON(!NFS_WBACK_BUSY(req));
-
- kref_get(&req->wb_kref);
- spin_unlock(&inode->i_lock);
- error = nfs_wait_on_request(req);
- nfs_release_request(req);
- spin_lock(&inode->i_lock);
- if (error < 0)
- return error;
- res++;
- }
- return res;
-}
-
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int
nfs_need_commit(struct nfs_inode *nfsi)
@@ -1437,46 +1399,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
return nfs_commit_unstable_pages(inode, wbc);
}
-long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
- struct inode *inode = mapping->host;
- pgoff_t idx_start, idx_end;
- unsigned int npages = 0;
- LIST_HEAD(head);
- long pages, ret;
-
- /* FIXME */
- if (wbc->range_cyclic)
- idx_start = 0;
- else {
- idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
- idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (idx_end > idx_start) {
- pgoff_t l_npages = 1 + idx_end - idx_start;
- npages = l_npages;
- if (sizeof(npages) != sizeof(l_npages) &&
- (pgoff_t)npages != l_npages)
- npages = 0;
- }
- }
- spin_lock(&inode->i_lock);
- do {
- ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
- if (ret != 0)
- continue;
- pages = nfs_scan_commit(inode, &head, idx_start, npages);
- if (pages == 0)
- break;
- pages += nfs_scan_commit(inode, &head, 0, 0);
- spin_unlock(&inode->i_lock);
- ret = nfs_commit_list(inode, &head, how);
- spin_lock(&inode->i_lock);
-
- } while (ret >= 0);
- spin_unlock(&inode->i_lock);
- return ret;
-}
-
/*
* flush the inode to disk.
*/
@@ -1524,45 +1446,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
return ret;
}
-static int nfs_wb_page_priority(struct inode *inode, struct page *page,
- int how)
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page *page)
{
loff_t range_start = page_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
struct writeback_control wbc = {
- .bdi = page->mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
+ .nr_to_write = 0,
.range_start = range_start,
.range_end = range_end,
};
+ struct nfs_page *req;
+ int need_commit;
int ret;
- do {
+ while(PagePrivate(page)) {
if (clear_page_dirty_for_io(page)) {
ret = nfs_writepage_locked(page, &wbc);
if (ret < 0)
goto out_error;
- } else if (!PagePrivate(page))
+ }
+ req = nfs_find_and_lock_request(page);
+ if (!req)
break;
- ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
- if (ret < 0)
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
goto out_error;
- } while (PagePrivate(page));
+ }
+ need_commit = test_bit(PG_CLEAN, &req->wb_flags);
+ nfs_clear_page_tag_locked(req);
+ if (need_commit) {
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (ret < 0)
+ goto out_error;
+ }
+ }
return 0;
out_error:
- __mark_inode_dirty(inode, I_DIRTY_PAGES);
return ret;
}
-/*
- * Write back all requests on one page - we do this before reading it.
- */
-int nfs_wb_page(struct inode *inode, struct page* page)
-{
- return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
-}
-
#ifdef CONFIG_MIGRATION
int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c536caf..6ee74c5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -474,7 +474,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
* Try to write back everything synchronously (but check the
* return value!)
*/
-extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
extern int nfs_wb_all(struct inode *inode);
extern int nfs_wb_nocommit(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page);
^ permalink raw reply related [flat|nested] 48+ messages in thread
[parent not found: <20100125221545.16750.19154.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
[not found] ` <20100125221545.16750.19154.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-10 18:51 ` J. R. Okajima
0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-10 18:51 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Trond Myklebust:
> -static int nfs_wb_page_priority(struct inode *inode, struct page *page,
> - int how)
> +/*
> + * Write back all requests on one page - we do this before reading it.
> + */
> +int nfs_wb_page(struct inode *inode, struct page *page)
> {
:::
> - do {
> + while(PagePrivate(page)) {
> if (clear_page_dirty_for_io(page)) {
> ret = nfs_writepage_locked(page, &wbc);
> if (ret < 0)
> goto out_error;
> - } else if (!PagePrivate(page))
> + }
> + req = nfs_find_and_lock_request(page);
> + if (!req)
> break;
> - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
> - if (ret < 0)
> + if (IS_ERR(req)) {
> + ret = PTR_ERR(req);
> goto out_error;
:::
Hello Trond,
I am unsure whether this nfs_find_and_lock_request() call is correct or
not, but it brings me a problem.
This call trace in "kswapd blocked for more than brabra" message shows
that generic_delete_inode() blocked.
Is this put_nfs_open_context() -- iput() call in shrink_page_list()
context intended?
If not, I'd suggest a patch like this.
INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0 D 0000000000000001 0 305 2 0x00000000
ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
[<ffffffff8146155d>] io_schedule+0x4d/0x70
[<ffffffff810d2be5>] sync_page+0x65/0xa0
[<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
[<ffffffff810d2b80>] ? sync_page+0x0/0xa0
[<ffffffff810d2b64>] __lock_page+0x64/0x70
[<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
[<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
[<ffffffff810df340>] truncate_inode_pages+0x10/0x20
[<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
[<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
[<ffffffff8112bb88>] iput+0x78/0x80
[<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
[<ffffffff811285f4>] dentry_iput+0x84/0x110
[<ffffffff811286ae>] d_kill+0x2e/0x60
[<ffffffff8112912a>] dput+0x7a/0x170
[<ffffffff8111e925>] path_put+0x15/0x40
[<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
[<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
[<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
[<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
[<ffffffff81234b7e>] kref_put+0x8e/0xe0
[<ffffffff811cb594>] nfs_release_request+0x14/0x20
[<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
[<ffffffff811d1180>] nfs_wb_page+0x80/0x110
[<ffffffff811c0770>] nfs_release_page+0x70/0x90
[<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
[<ffffffff810e1178>] shrink_page_list+0x638/0x860
[<ffffffff810e19de>] shrink_zone+0x63e/0xc40
[<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
[<ffffffff8107641e>] ? up_read+0x1e/0x40
[<ffffffff810e26a9>] kswapd+0x6c9/0xa20
[<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
[<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
[<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
[<ffffffff810706d6>] kthread+0x96/0xb0
[<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
[<ffffffff81464f14>] ? restore_args+0x0/0x30
[<ffffffff81070640>] ? kthread+0x0/0xb0
[<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
no locks held by kswapd0/305.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ae8d022..ffa5463 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
{
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
- if (gfp & __GFP_WAIT)
+ if (gfp & __GFP_WAIT) {
+ struct inode *inode;
+
+ inode = igrab(page->mapping->host);
nfs_wb_page(page->mapping->host, page);
+ iput(inode);
+ }
/* If PagePrivate() is set, then the page is not freeable */
if (PagePrivate(page))
return 0;
J. R. Okajima
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-10 18:51 ` J. R. Okajima
0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-10 18:51 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
Trond Myklebust:
> -static int nfs_wb_page_priority(struct inode *inode, struct page *page,
> - int how)
> +/*
> + * Write back all requests on one page - we do this before reading it.
> + */
> +int nfs_wb_page(struct inode *inode, struct page *page)
> {
:::
> - do {
> + while(PagePrivate(page)) {
> if (clear_page_dirty_for_io(page)) {
> ret = nfs_writepage_locked(page, &wbc);
> if (ret < 0)
> goto out_error;
> - } else if (!PagePrivate(page))
> + }
> + req = nfs_find_and_lock_request(page);
> + if (!req)
> break;
> - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
> - if (ret < 0)
> + if (IS_ERR(req)) {
> + ret = PTR_ERR(req);
> goto out_error;
:::
Hello Trond,
I am unsure whether this nfs_find_and_lock_request() call is correct or
not, but it brings me a problem.
This call trace in "kswapd blocked for more than brabra" message shows
that generic_delete_inode() blocked.
Is this put_nfs_open_context() -- iput() call in shrink_page_list()
context intended?
If not, I'd suggest a patch like this.
INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0 D 0000000000000001 0 305 2 0x00000000
ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
[<ffffffff8146155d>] io_schedule+0x4d/0x70
[<ffffffff810d2be5>] sync_page+0x65/0xa0
[<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
[<ffffffff810d2b80>] ? sync_page+0x0/0xa0
[<ffffffff810d2b64>] __lock_page+0x64/0x70
[<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
[<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
[<ffffffff810df340>] truncate_inode_pages+0x10/0x20
[<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
[<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
[<ffffffff8112bb88>] iput+0x78/0x80
[<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
[<ffffffff811285f4>] dentry_iput+0x84/0x110
[<ffffffff811286ae>] d_kill+0x2e/0x60
[<ffffffff8112912a>] dput+0x7a/0x170
[<ffffffff8111e925>] path_put+0x15/0x40
[<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
[<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
[<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
[<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
[<ffffffff81234b7e>] kref_put+0x8e/0xe0
[<ffffffff811cb594>] nfs_release_request+0x14/0x20
[<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
[<ffffffff811d1180>] nfs_wb_page+0x80/0x110
[<ffffffff811c0770>] nfs_release_page+0x70/0x90
[<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
[<ffffffff810e1178>] shrink_page_list+0x638/0x860
[<ffffffff810e19de>] shrink_zone+0x63e/0xc40
[<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
[<ffffffff8107641e>] ? up_read+0x1e/0x40
[<ffffffff810e26a9>] kswapd+0x6c9/0xa20
[<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
[<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
[<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
[<ffffffff810706d6>] kthread+0x96/0xb0
[<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
[<ffffffff81464f14>] ? restore_args+0x0/0x30
[<ffffffff81070640>] ? kthread+0x0/0xb0
[<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
no locks held by kswapd0/305.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ae8d022..ffa5463 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
{
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
- if (gfp & __GFP_WAIT)
+ if (gfp & __GFP_WAIT) {
+ struct inode *inode;
+
+ inode = igrab(page->mapping->host);
nfs_wb_page(page->mapping->host, page);
+ iput(inode);
+ }
/* If PagePrivate() is set, then the page is not freeable */
if (PagePrivate(page))
return 0;
J. R. Okajima
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
2010-03-10 18:51 ` J. R. Okajima
@ 2010-03-10 19:31 ` Trond Myklebust
-1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 19:31 UTC (permalink / raw)
To: J. R. Okajima
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
On Thu, 2010-03-11 at 03:51 +0900, J. R. Okajima wrote:
>
> INFO: task kswapd0:305 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> kswapd0 D 0000000000000001 0 305 2 0x00000000
> ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
> ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
> ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
> Call Trace:
> [<ffffffff8146155d>] io_schedule+0x4d/0x70
> [<ffffffff810d2be5>] sync_page+0x65/0xa0
> [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
> [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
> [<ffffffff810d2b64>] __lock_page+0x64/0x70
> [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
> [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
> [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
> [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
> [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
> [<ffffffff8112bb88>] iput+0x78/0x80
> [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
> [<ffffffff811285f4>] dentry_iput+0x84/0x110
> [<ffffffff811286ae>] d_kill+0x2e/0x60
> [<ffffffff8112912a>] dput+0x7a/0x170
> [<ffffffff8111e925>] path_put+0x15/0x40
> [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
> [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
> [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
> [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
> [<ffffffff81234b7e>] kref_put+0x8e/0xe0
> [<ffffffff811cb594>] nfs_release_request+0x14/0x20
> [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
> [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
> [<ffffffff811c0770>] nfs_release_page+0x70/0x90
> [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
> [<ffffffff810e1178>] shrink_page_list+0x638/0x860
> [<ffffffff810e19de>] shrink_zone+0x63e/0xc40
> [<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
> [<ffffffff8107641e>] ? up_read+0x1e/0x40
> [<ffffffff810e26a9>] kswapd+0x6c9/0xa20
> [<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
> [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
> [<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
> [<ffffffff810706d6>] kthread+0x96/0xb0
> [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
> [<ffffffff81464f14>] ? restore_args+0x0/0x30
> [<ffffffff81070640>] ? kthread+0x0/0xb0
> [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
> no locks held by kswapd0/305.
>
>
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index ae8d022..ffa5463 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
> {
> dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
>
> - if (gfp & __GFP_WAIT)
> + if (gfp & __GFP_WAIT) {
> + struct inode *inode;
> +
> + inode = igrab(page->mapping->host);
> nfs_wb_page(page->mapping->host, page);
> + iput(inode);
> + }
> /* If PagePrivate() is set, then the page is not freeable */
> if (PagePrivate(page))
> return 0;
>
>
> J. R. Okajima
>From your trace it looks as if the problem is that the nfs_wb_page() is
triggering a dentry release, which deadlocks with in
truncate_inode_pages() because the _caller_ of nfs_release_page() holds
a page lock.
As far as I can see, your iput() call above can deadlock in exactly the
same way.
Note that shrink_page_list() is the only function that does this sort of
thing without holding a reference to the inode.
Cheers
Trond
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-10 19:31 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 19:31 UTC (permalink / raw)
To: J. R. Okajima
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
On Thu, 2010-03-11 at 03:51 +0900, J. R. Okajima wrote:
>
> INFO: task kswapd0:305 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> kswapd0 D 0000000000000001 0 305 2 0x00000000
> ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
> ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
> ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
> Call Trace:
> [<ffffffff8146155d>] io_schedule+0x4d/0x70
> [<ffffffff810d2be5>] sync_page+0x65/0xa0
> [<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
> [<ffffffff810d2b80>] ? sync_page+0x0/0xa0
> [<ffffffff810d2b64>] __lock_page+0x64/0x70
> [<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
> [<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
> [<ffffffff810df340>] truncate_inode_pages+0x10/0x20
> [<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
> [<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
> [<ffffffff8112bb88>] iput+0x78/0x80
> [<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
> [<ffffffff811285f4>] dentry_iput+0x84/0x110
> [<ffffffff811286ae>] d_kill+0x2e/0x60
> [<ffffffff8112912a>] dput+0x7a/0x170
> [<ffffffff8111e925>] path_put+0x15/0x40
> [<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
> [<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
> [<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
> [<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
> [<ffffffff81234b7e>] kref_put+0x8e/0xe0
> [<ffffffff811cb594>] nfs_release_request+0x14/0x20
> [<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
> [<ffffffff811d1180>] nfs_wb_page+0x80/0x110
> [<ffffffff811c0770>] nfs_release_page+0x70/0x90
> [<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
> [<ffffffff810e1178>] shrink_page_list+0x638/0x860
> [<ffffffff810e19de>] shrink_zone+0x63e/0xc40
> [<ffffffff81464437>] ? _raw_spin_unlock+0x57/0x70
> [<ffffffff8107641e>] ? up_read+0x1e/0x40
> [<ffffffff810e26a9>] kswapd+0x6c9/0xa20
> [<ffffffff810df700>] ? isolate_pages_global+0x0/0x280
> [<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
> [<ffffffff810e1fe0>] ? kswapd+0x0/0xa20
> [<ffffffff810706d6>] kthread+0x96/0xb0
> [<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
> [<ffffffff81464f14>] ? restore_args+0x0/0x30
> [<ffffffff81070640>] ? kthread+0x0/0xb0
> [<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
> no locks held by kswapd0/305.
>
>
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index ae8d022..ffa5463 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -491,8 +491,13 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
> {
> dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
>
> - if (gfp & __GFP_WAIT)
> + if (gfp & __GFP_WAIT) {
> + struct inode *inode;
> +
> + inode = igrab(page->mapping->host);
> nfs_wb_page(page->mapping->host, page);
> + iput(inode);
> + }
> /* If PagePrivate() is set, then the page is not freeable */
> if (PagePrivate(page))
> return 0;
>
>
> J. R. Okajima
>From your trace it looks as if the problem is that the nfs_wb_page() is
triggering a dentry release, which deadlocks with in
truncate_inode_pages() because the _caller_ of nfs_release_page() holds
a page lock.
As far as I can see, your iput() call above can deadlock in exactly the
same way.
Note that shrink_page_list() is the only function that does this sort of
thing without holding a reference to the inode.
Cheers
Trond
^ permalink raw reply [flat|nested] 48+ messages in thread
[parent not found: <1268249482.3096.76.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
[not found] ` <1268249482.3096.76.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-10 20:18 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 20:18 UTC (permalink / raw)
To: J. R. Okajima
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
On Wed, 2010-03-10 at 14:31 -0500, Trond Myklebust wrote:
> >From your trace it looks as if the problem is that the nfs_wb_page() is
> triggering a dentry release, which deadlocks with in
> truncate_inode_pages() because the _caller_ of nfs_release_page() holds
> a page lock.
>
> As far as I can see, your iput() call above can deadlock in exactly the
> same way.
>
> Note that shrink_page_list() is the only function that does this sort of
> thing without holding a reference to the inode.
OK. Does the following patch fix the deadlock for you?
Cheers
Trond
-----------------------------------------------------------------------------------------------------------
NFS: Avoid a deadlock in nfs_release_page
From: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
J.R. Okajima reports the following deadlock:
INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0 D 0000000000000001 0 305 2 0x00000000
ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
[<ffffffff8146155d>] io_schedule+0x4d/0x70
[<ffffffff810d2be5>] sync_page+0x65/0xa0
[<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
[<ffffffff810d2b80>] ? sync_page+0x0/0xa0
[<ffffffff810d2b64>] __lock_page+0x64/0x70
[<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
[<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
[<ffffffff810df340>] truncate_inode_pages+0x10/0x20
[<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
[<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
[<ffffffff8112bb88>] iput+0x78/0x80
[<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
[<ffffffff811285f4>] dentry_iput+0x84/0x110
[<ffffffff811286ae>] d_kill+0x2e/0x60
[<ffffffff8112912a>] dput+0x7a/0x170
[<ffffffff8111e925>] path_put+0x15/0x40
[<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
[<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
[<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
[<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
[<ffffffff81234b7e>] kref_put+0x8e/0xe0
[<ffffffff811cb594>] nfs_release_request+0x14/0x20
[<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
[<ffffffff811d1180>] nfs_wb_page+0x80/0x110
[<ffffffff811c0770>] nfs_release_page+0x70/0x90
[<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
[<ffffffff810e1178>] shrink_page_list+0x638/0x860
[<ffffffff810e19de>] shrink_zone+0x63e/0xc40
We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/pagelist.c | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..81fb4a5 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -148,10 +148,16 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
void nfs_clear_request(struct nfs_page *req)
{
struct page *page = req->wb_page;
+ struct nfs_open_context *ctx = req->wb_context;
+
if (page != NULL) {
page_cache_release(page);
req->wb_page = NULL;
}
+ if (ctx != NULL) {
+ put_nfs_open_context(ctx);
+ req->wb_context = NULL;
+ }
}
@@ -165,9 +171,8 @@ static void nfs_free_request(struct kref *kref)
{
struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
- /* Release struct file or cached credential */
+ /* Release struct file and open context */
nfs_clear_request(req);
- put_nfs_open_context(req->wb_context);
nfs_page_free(req);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-10 20:18 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-10 20:18 UTC (permalink / raw)
To: J. R. Okajima
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
On Wed, 2010-03-10 at 14:31 -0500, Trond Myklebust wrote:
> >From your trace it looks as if the problem is that the nfs_wb_page() is
> triggering a dentry release, which deadlocks with in
> truncate_inode_pages() because the _caller_ of nfs_release_page() holds
> a page lock.
>
> As far as I can see, your iput() call above can deadlock in exactly the
> same way.
>
> Note that shrink_page_list() is the only function that does this sort of
> thing without holding a reference to the inode.
OK. Does the following patch fix the deadlock for you?
Cheers
Trond
-----------------------------------------------------------------------------------------------------------
NFS: Avoid a deadlock in nfs_release_page
From: Trond Myklebust <Trond.Myklebust@netapp.com>
J.R. Okajima reports the following deadlock:
INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0 D 0000000000000001 0 305 2 0x00000000
ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
[<ffffffff8146155d>] io_schedule+0x4d/0x70
[<ffffffff810d2be5>] sync_page+0x65/0xa0
[<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
[<ffffffff810d2b80>] ? sync_page+0x0/0xa0
[<ffffffff810d2b64>] __lock_page+0x64/0x70
[<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
[<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
[<ffffffff810df340>] truncate_inode_pages+0x10/0x20
[<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
[<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
[<ffffffff8112bb88>] iput+0x78/0x80
[<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
[<ffffffff811285f4>] dentry_iput+0x84/0x110
[<ffffffff811286ae>] d_kill+0x2e/0x60
[<ffffffff8112912a>] dput+0x7a/0x170
[<ffffffff8111e925>] path_put+0x15/0x40
[<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
[<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
[<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
[<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
[<ffffffff81234b7e>] kref_put+0x8e/0xe0
[<ffffffff811cb594>] nfs_release_request+0x14/0x20
[<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
[<ffffffff811d1180>] nfs_wb_page+0x80/0x110
[<ffffffff811c0770>] nfs_release_page+0x70/0x90
[<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
[<ffffffff810e1178>] shrink_page_list+0x638/0x860
[<ffffffff810e19de>] shrink_zone+0x63e/0xc40
We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/pagelist.c | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..81fb4a5 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -148,10 +148,16 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
void nfs_clear_request(struct nfs_page *req)
{
struct page *page = req->wb_page;
+ struct nfs_open_context *ctx = req->wb_context;
+
if (page != NULL) {
page_cache_release(page);
req->wb_page = NULL;
}
+ if (ctx != NULL) {
+ put_nfs_open_context(ctx);
+ req->wb_context = NULL;
+ }
}
@@ -165,9 +171,8 @@ static void nfs_free_request(struct kref *kref)
{
struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
- /* Release struct file or cached credential */
+ /* Release struct file and open context */
nfs_clear_request(req);
- put_nfs_open_context(req->wb_context);
nfs_page_free(req);
}
^ permalink raw reply related [flat|nested] 48+ messages in thread
[parent not found: <1268252300.3096.81.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
[not found] ` <1268252300.3096.81.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-11 4:45 ` J. R. Okajima
0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-11 4:45 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Trond Myklebust:
> OK. Does the following patch fix the deadlock for you?
Thanx for the patch, but it didn't work.
BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
PGD 1e641067 PUD 135a3067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
last sysfs file: /sys/fs/aufs/si_fa7907d8bbb32280/br1
CPU 0
Modules linked in: aufs configs nfsd exportfs nls_iso8859_1 nls_cp437 [last unloaded: aufs]
Pid: 308, comm: nfsiod Not tainted 2.6.33aufsD #730 IPM41/Pegatron
RIP: 0010:[<ffffffff811cb860>] [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
RSP: 0018:ffff88001f26bd10 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff86b47850
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88001b87e7c0
RBP: ffff88001f26bd30 R08: fdb3fbf8f8168244 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffff88001b87e7c0
R13: ffff88001f277110 R14: 0000000000000000 R15: ffff88001f2772f8
FS: 0000000000000000(0000) GS:ffff88000bc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000010 CR3: 000000001de5a000 CR4: 00000000000406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process nfsiod (pid: 308, threadinfo ffff88001f26a000, task ffff88001fe92040)
Stack:
ffff88001b87e7c0 0000000000000000 ffff88001b87e7c0 ffff88001f277110
<0> ffff88001f26bd70 ffffffff811d1464 ffff88001f2772ec 0000000000000000
<0> ffff88001f277118 ffff88001f277110 ffffffff8162b980 ffffffff81435e20
Call Trace:
[<ffffffff811d1464>] nfs_commit_release+0x74/0x210
[<ffffffff81435e20>] ? rpc_async_release+0x0/0x20
[<ffffffff81435c82>] rpc_release_calldata+0x12/0x20
[<ffffffff81435cea>] rpc_free_task+0x3a/0xa0
[<ffffffff81435e30>] rpc_async_release+0x10/0x20
[<ffffffff8106bd84>] worker_thread+0x2d4/0x420
[<ffffffff8106bd31>] ? worker_thread+0x281/0x420
[<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
[<ffffffff8106bab0>] ? worker_thread+0x0/0x420
[<ffffffff810706d6>] kthread+0x96/0xb0
[<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
[<ffffffff81464f14>] ? restore_args+0x0/0x30
[<ffffffff81070640>] ? kthread+0x0/0xb0
[<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
Code: 29 00 0f 0b eb fe 0f 1f 44 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 48 89 5d e8 4c 89 6d f8 49 89 fc 48 8b 47 18 48 83 7f 10 00 <48> 8b 40 10 4c 8b 68 40 74 46 49 8d 9d b0 00 00 00 48 89 df e8
RIP [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
RSP <ffff88001f26bd10>
CR2: 0000000000000010
---[ end trace 1265485acdc49b9d ]---
J. R. Okajima
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-11 4:45 ` J. R. Okajima
0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-11 4:45 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
Trond Myklebust:
> OK. Does the following patch fix the deadlock for you?
Thanx for the patch, but it didn't work.
BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
PGD 1e641067 PUD 135a3067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
last sysfs file: /sys/fs/aufs/si_fa7907d8bbb32280/br1
CPU 0
Modules linked in: aufs configs nfsd exportfs nls_iso8859_1 nls_cp437 [last unloaded: aufs]
Pid: 308, comm: nfsiod Not tainted 2.6.33aufsD #730 IPM41/Pegatron
RIP: 0010:[<ffffffff811cb860>] [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
RSP: 0018:ffff88001f26bd10 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff86b47850
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88001b87e7c0
RBP: ffff88001f26bd30 R08: fdb3fbf8f8168244 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffff88001b87e7c0
R13: ffff88001f277110 R14: 0000000000000000 R15: ffff88001f2772f8
FS: 0000000000000000(0000) GS:ffff88000bc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000010 CR3: 000000001de5a000 CR4: 00000000000406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process nfsiod (pid: 308, threadinfo ffff88001f26a000, task ffff88001fe92040)
Stack:
ffff88001b87e7c0 0000000000000000 ffff88001b87e7c0 ffff88001f277110
<0> ffff88001f26bd70 ffffffff811d1464 ffff88001f2772ec 0000000000000000
<0> ffff88001f277118 ffff88001f277110 ffffffff8162b980 ffffffff81435e20
Call Trace:
[<ffffffff811d1464>] nfs_commit_release+0x74/0x210
[<ffffffff81435e20>] ? rpc_async_release+0x0/0x20
[<ffffffff81435c82>] rpc_release_calldata+0x12/0x20
[<ffffffff81435cea>] rpc_free_task+0x3a/0xa0
[<ffffffff81435e30>] rpc_async_release+0x10/0x20
[<ffffffff8106bd84>] worker_thread+0x2d4/0x420
[<ffffffff8106bd31>] ? worker_thread+0x281/0x420
[<ffffffff81070ca0>] ? autoremove_wake_function+0x0/0x40
[<ffffffff8106bab0>] ? worker_thread+0x0/0x420
[<ffffffff810706d6>] kthread+0x96/0xb0
[<ffffffff8100b5a4>] kernel_thread_helper+0x4/0x10
[<ffffffff81464f14>] ? restore_args+0x0/0x30
[<ffffffff81070640>] ? kthread+0x0/0xb0
[<ffffffff8100b5a0>] ? kernel_thread_helper+0x0/0x10
Code: 29 00 0f 0b eb fe 0f 1f 44 00 00 55 48 89 e5 48 83 ec 20 4c 89 65 f0 48 89 5d e8 4c 89 6d f8 49 89 fc 48 8b 47 18 48 83 7f 10 00 <48> 8b 40 10 4c 8b 68 40 74 46 49 8d 9d b0 00 00 00 48 89 df e8
RIP [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
RSP <ffff88001f26bd10>
CR2: 0000000000000010
---[ end trace 1265485acdc49b9d ]---
J. R. Okajima
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
2010-03-11 4:45 ` J. R. Okajima
@ 2010-03-11 14:26 ` Trond Myklebust
-1 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-11 14:26 UTC (permalink / raw)
To: J. R. Okajima
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
On Thu, 2010-03-11 at 13:45 +0900, J. R. Okajima wrote:
> Trond Myklebust:
> > OK. Does the following patch fix the deadlock for you?
>
> Thanx for the patch, but it didn't work.
>
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
> IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
Oops... Yes, that needs to be fixed, and we should probably make
nfs_set_page_tag_locked() safe too.
How about the following? I can't seem to find any further dereferences
of req->wb_context after the nfs_clear_request().
Cheers
Trond
-------------------------------------------------------------------------------------------------------
NFS: Avoid a deadlock in nfs_release_page
From: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
J.R. Okajima reports the following deadlock:
INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0 D 0000000000000001 0 305 2 0x00000000
ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
[<ffffffff8146155d>] io_schedule+0x4d/0x70
[<ffffffff810d2be5>] sync_page+0x65/0xa0
[<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
[<ffffffff810d2b80>] ? sync_page+0x0/0xa0
[<ffffffff810d2b64>] __lock_page+0x64/0x70
[<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
[<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
[<ffffffff810df340>] truncate_inode_pages+0x10/0x20
[<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
[<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
[<ffffffff8112bb88>] iput+0x78/0x80
[<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
[<ffffffff811285f4>] dentry_iput+0x84/0x110
[<ffffffff811286ae>] d_kill+0x2e/0x60
[<ffffffff8112912a>] dput+0x7a/0x170
[<ffffffff8111e925>] path_put+0x15/0x40
[<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
[<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
[<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
[<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
[<ffffffff81234b7e>] kref_put+0x8e/0xe0
[<ffffffff811cb594>] nfs_release_request+0x14/0x20
[<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
[<ffffffff811d1180>] nfs_wb_page+0x80/0x110
[<ffffffff811c0770>] nfs_release_page+0x70/0x90
[<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
[<ffffffff810e1178>] shrink_page_list+0x638/0x860
[<ffffffff810e19de>] shrink_zone+0x63e/0xc40
We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
Cc: stable-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org
---
fs/nfs/pagelist.c | 23 +++++++++++++----------
1 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..29d9d36 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
*/
int nfs_set_page_tag_locked(struct nfs_page *req)
{
- struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
-
if (!nfs_lock_request_dontget(req))
return 0;
if (req->wb_page != NULL)
- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
return 1;
}
@@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
*/
void nfs_clear_page_tag_locked(struct nfs_page *req)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
- struct nfs_inode *nfsi = NFS_I(inode);
-
if (req->wb_page != NULL) {
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
spin_lock(&inode->i_lock);
radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
nfs_unlock_request(req);
@@ -142,16 +140,22 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
* nfs_clear_request - Free up all resources allocated to the request
* @req:
*
- * Release page resources associated with a write request after it
- * has completed.
+ * Release page and open context resources associated with a read/write
+ * request after it has completed.
*/
void nfs_clear_request(struct nfs_page *req)
{
struct page *page = req->wb_page;
+ struct nfs_open_context *ctx = req->wb_context;
+
if (page != NULL) {
page_cache_release(page);
req->wb_page = NULL;
}
+ if (ctx != NULL) {
+ put_nfs_open_context(ctx);
+ req->wb_context = NULL;
+ }
}
@@ -165,9 +169,8 @@ static void nfs_free_request(struct kref *kref)
{
struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
- /* Release struct file or cached credential */
+ /* Release struct file and open context */
nfs_clear_request(req);
- put_nfs_open_context(req->wb_context);
nfs_page_free(req);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-11 14:26 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-03-11 14:26 UTC (permalink / raw)
To: J. R. Okajima
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
On Thu, 2010-03-11 at 13:45 +0900, J. R. Okajima wrote:
> Trond Myklebust:
> > OK. Does the following patch fix the deadlock for you?
>
> Thanx for the patch, but it didn't work.
>
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
> IP: [<ffffffff811cb860>] nfs_clear_page_tag_locked+0x20/0x80
Oops... Yes, that needs to be fixed, and we should probably make
nfs_set_page_tag_locked() safe too.
How about the following? I can't seem to find any further dereferences
of req->wb_context after the nfs_clear_request().
Cheers
Trond
-------------------------------------------------------------------------------------------------------
NFS: Avoid a deadlock in nfs_release_page
From: Trond Myklebust <Trond.Myklebust@netapp.com>
J.R. Okajima reports the following deadlock:
INFO: task kswapd0:305 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kswapd0 D 0000000000000001 0 305 2 0x00000000
ffff88001f21d4f0 0000000000000046 ffff88001fdea680 ffff88001f21c000
ffff88001f21dfd8 ffff88001f21c000 ffff88001f21dfd8 ffff88001f21dfd8
ffff88001fdea040 0000000000014c00 0000000000000001 ffff88001fdea040
Call Trace:
[<ffffffff8146155d>] io_schedule+0x4d/0x70
[<ffffffff810d2be5>] sync_page+0x65/0xa0
[<ffffffff81461b12>] __wait_on_bit_lock+0x52/0xb0
[<ffffffff810d2b80>] ? sync_page+0x0/0xa0
[<ffffffff810d2b64>] __lock_page+0x64/0x70
[<ffffffff81070ce0>] ? wake_bit_function+0x0/0x40
[<ffffffff810df1d4>] truncate_inode_pages_range+0x344/0x4a0
[<ffffffff810df340>] truncate_inode_pages+0x10/0x20
[<ffffffff8112cbfe>] generic_delete_inode+0x15e/0x190
[<ffffffff8112cc8d>] generic_drop_inode+0x5d/0x80
[<ffffffff8112bb88>] iput+0x78/0x80
[<ffffffff811bc908>] nfs_dentry_iput+0x38/0x50
[<ffffffff811285f4>] dentry_iput+0x84/0x110
[<ffffffff811286ae>] d_kill+0x2e/0x60
[<ffffffff8112912a>] dput+0x7a/0x170
[<ffffffff8111e925>] path_put+0x15/0x40
[<ffffffff811c3a44>] __put_nfs_open_context+0xa4/0xb0
[<ffffffff811cb5d0>] ? nfs_free_request+0x0/0x50
[<ffffffff811c3b0b>] put_nfs_open_context+0xb/0x10
[<ffffffff811cb5f9>] nfs_free_request+0x29/0x50
[<ffffffff81234b7e>] kref_put+0x8e/0xe0
[<ffffffff811cb594>] nfs_release_request+0x14/0x20
[<ffffffff811cf769>] nfs_find_and_lock_request+0x89/0xa0
[<ffffffff811d1180>] nfs_wb_page+0x80/0x110
[<ffffffff811c0770>] nfs_release_page+0x70/0x90
[<ffffffff810d18ee>] try_to_release_page+0x5e/0x80
[<ffffffff810e1178>] shrink_page_list+0x638/0x860
[<ffffffff810e19de>] shrink_zone+0x63e/0xc40
We can fix this by making the call to put_nfs_open_context() happen when we
actually remove the write request from the inode (which is done by the
nfsiod thread in this case).
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
fs/nfs/pagelist.c | 23 +++++++++++++----------
1 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a12c45b..29d9d36 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
*/
int nfs_set_page_tag_locked(struct nfs_page *req)
{
- struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
-
if (!nfs_lock_request_dontget(req))
return 0;
if (req->wb_page != NULL)
- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
return 1;
}
@@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
*/
void nfs_clear_page_tag_locked(struct nfs_page *req)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
- struct nfs_inode *nfsi = NFS_I(inode);
-
if (req->wb_page != NULL) {
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
spin_lock(&inode->i_lock);
radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
nfs_unlock_request(req);
@@ -142,16 +140,22 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
* nfs_clear_request - Free up all resources allocated to the request
* @req:
*
- * Release page resources associated with a write request after it
- * has completed.
+ * Release page and open context resources associated with a read/write
+ * request after it has completed.
*/
void nfs_clear_request(struct nfs_page *req)
{
struct page *page = req->wb_page;
+ struct nfs_open_context *ctx = req->wb_context;
+
if (page != NULL) {
page_cache_release(page);
req->wb_page = NULL;
}
+ if (ctx != NULL) {
+ put_nfs_open_context(ctx);
+ req->wb_context = NULL;
+ }
}
@@ -165,9 +169,8 @@ static void nfs_free_request(struct kref *kref)
{
struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
- /* Release struct file or cached credential */
+ /* Release struct file and open context */
nfs_clear_request(req);
- put_nfs_open_context(req->wb_context);
nfs_page_free(req);
}
^ permalink raw reply related [flat|nested] 48+ messages in thread
[parent not found: <1268317582.3354.9.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
[not found] ` <1268317582.3354.9.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-12 4:22 ` J. R. Okajima
2010-03-17 16:49 ` Christoph Hellwig
1 sibling, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-12 4:22 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang, Peter Zijlstra,
Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Trond Myklebust:
> Oops... Yes, that needs to be fixed, and we should probably make
> nfs_set_page_tag_locked() safe too.
>
> How about the following? I can't seem to find any further dereferences
> of req->wb_context after the nfs_clear_request().
It passed all of my local tests successfully.
Thank you.
J. R. Okajima
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-12 4:22 ` J. R. Okajima
0 siblings, 0 replies; 48+ messages in thread
From: J. R. Okajima @ 2010-03-12 4:22 UTC (permalink / raw)
To: Trond Myklebust
Cc: linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago,
Jens Axboe, Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel, Christoph Hellwig, Al Viro
Trond Myklebust:
> Oops... Yes, that needs to be fixed, and we should probably make
> nfs_set_page_tag_locked() safe too.
>
> How about the following? I can't seem to find any further dereferences
> of req->wb_context after the nfs_clear_request().
It passed all of my local tests successfully.
Thank you.
J. R. Okajima
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
[not found] ` <1268317582.3354.9.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-03-17 16:49 ` Christoph Hellwig
2010-03-17 16:49 ` Christoph Hellwig
1 sibling, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-03-17 16:49 UTC (permalink / raw)
To: Trond Myklebust
Cc: J. R. Okajima, linux-nfs-u79uwXL29TY76Z2rM5mHXA, Wu Fengguang,
Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe, Peter Staubach,
Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> */
> int nfs_set_page_tag_locked(struct nfs_page *req)
> {
> - struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> -
> if (!nfs_lock_request_dontget(req))
> return 0;
> if (req->wb_page != NULL)
> - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> + radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> return 1;
> }
>
This hunk is not only entirely unrealted to the fix, but also osbfucates
the code.
> @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> */
> void nfs_clear_page_tag_locked(struct nfs_page *req)
> {
> - struct inode *inode = req->wb_context->path.dentry->d_inode;
> - struct nfs_inode *nfsi = NFS_I(inode);
> -
> if (req->wb_page != NULL) {
> + struct inode *inode = req->wb_context->path.dentry->d_inode;
> + struct nfs_inode *nfsi = NFS_I(inode);
> +
> spin_lock(&inode->i_lock);
> radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> nfs_unlock_request(req);
Another one unrelated to the fix.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
@ 2010-03-17 16:49 ` Christoph Hellwig
0 siblings, 0 replies; 48+ messages in thread
From: Christoph Hellwig @ 2010-03-17 16:49 UTC (permalink / raw)
To: Trond Myklebust
Cc: J. R. Okajima, linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara,
Steve Rago, Jens Axboe, Peter Staubach, Arjan van de Ven,
Ingo Molnar, linux-fsdevel, Christoph Hellwig, Al Viro
On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> */
> int nfs_set_page_tag_locked(struct nfs_page *req)
> {
> - struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> -
> if (!nfs_lock_request_dontget(req))
> return 0;
> if (req->wb_page != NULL)
> - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> + radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> return 1;
> }
>
This hunk is not only entirely unrealted to the fix, but also osbfucates
the code.
> @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> */
> void nfs_clear_page_tag_locked(struct nfs_page *req)
> {
> - struct inode *inode = req->wb_context->path.dentry->d_inode;
> - struct nfs_inode *nfsi = NFS_I(inode);
> -
> if (req->wb_page != NULL) {
> + struct inode *inode = req->wb_context->path.dentry->d_inode;
> + struct nfs_inode *nfsi = NFS_I(inode);
> +
> spin_lock(&inode->i_lock);
> radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> nfs_unlock_request(req);
Another one unrelated to the fix.
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
2010-03-17 16:49 ` Christoph Hellwig
(?)
@ 2010-03-17 17:26 ` Trond Myklebust
2010-03-17 17:52 ` Jeff Layton
-1 siblings, 1 reply; 48+ messages in thread
From: Trond Myklebust @ 2010-03-17 17:26 UTC (permalink / raw)
To: Christoph Hellwig
Cc: J. R. Okajima, linux-nfs, Wu Fengguang, Peter Zijlstra, Jan Kara,
Steve Rago, Jens Axboe, Peter Staubach, Arjan van de Ven,
Ingo Molnar, linux-fsdevel, Christoph Hellwig, Al Viro
On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote:
> On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> > */
> > int nfs_set_page_tag_locked(struct nfs_page *req)
> > {
> > - struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > -
> > if (!nfs_lock_request_dontget(req))
> > return 0;
> > if (req->wb_page != NULL)
> > - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > + radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > return 1;
> > }
> >
>
> This hunk is not only entirely unrealted to the fix, but also osbfucates
> the code.
>
> > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> > */
> > void nfs_clear_page_tag_locked(struct nfs_page *req)
> > {
> > - struct inode *inode = req->wb_context->path.dentry->d_inode;
> > - struct nfs_inode *nfsi = NFS_I(inode);
> > -
> > if (req->wb_page != NULL) {
> > + struct inode *inode = req->wb_context->path.dentry->d_inode;
> > + struct nfs_inode *nfsi = NFS_I(inode);
> > +
> > spin_lock(&inode->i_lock);
> > radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > nfs_unlock_request(req);
>
> Another one unrelated to the fix.
>
No. They are needed because we don't want to dereference req->wb_context
after it (and req->wb_page) have been cleared. Without these 2 hunks,
the resulting kernel Oopses.
Trond
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
2010-03-17 17:26 ` Trond Myklebust
@ 2010-03-17 17:52 ` Jeff Layton
2010-03-17 17:58 ` Trond Myklebust
0 siblings, 1 reply; 48+ messages in thread
From: Jeff Layton @ 2010-03-17 17:52 UTC (permalink / raw)
To: Trond Myklebust
Cc: Christoph Hellwig, J. R. Okajima, linux-nfs, Wu Fengguang,
Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Arjan van de Ven, Ingo Molnar, linux-fsdevel, Christoph Hellwig,
Al Viro
On Wed, 17 Mar 2010 13:26:59 -0400
Trond Myklebust <Trond.Myklebust@netapp.com> wrote:
> On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote:
> > On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> > > */
> > > int nfs_set_page_tag_locked(struct nfs_page *req)
> > > {
> > > - struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > > -
> > > if (!nfs_lock_request_dontget(req))
> > > return 0;
> > > if (req->wb_page != NULL)
> > > - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > + radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > return 1;
> > > }
> > >
> >
> > This hunk is not only entirely unrealted to the fix, but also osbfucates
> > the code.
> >
> > > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> > > */
> > > void nfs_clear_page_tag_locked(struct nfs_page *req)
> > > {
> > > - struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > - struct nfs_inode *nfsi = NFS_I(inode);
> > > -
> > > if (req->wb_page != NULL) {
> > > + struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > + struct nfs_inode *nfsi = NFS_I(inode);
> > > +
> > > spin_lock(&inode->i_lock);
> > > radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > nfs_unlock_request(req);
> >
> > Another one unrelated to the fix.
> >
>
> No. They are needed because we don't want to dereference req->wb_context
> after it (and req->wb_page) have been cleared. Without these 2 hunks,
> the resulting kernel Oopses.
>
It seems like that just tightens up the race window without actually
closing it.
Just because wb_page was non-NULL when you checked it gives no
guarantee that wb_context won't be NULL when you go to dereference it,
right?
--
Jeff Layton <jlayton@redhat.com>
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH 10/12] NFS: Simplify nfs_wb_page()
2010-03-17 17:52 ` Jeff Layton
@ 2010-03-17 17:58 ` Trond Myklebust
[not found] ` <1268848682.8335.5.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
0 siblings, 1 reply; 48+ messages in thread
From: Trond Myklebust @ 2010-03-17 17:58 UTC (permalink / raw)
To: Jeff Layton
Cc: Christoph Hellwig, J. R. Okajima, linux-nfs, Wu Fengguang,
Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Arjan van de Ven, Ingo Molnar, linux-fsdevel, Christoph Hellwig,
Al Viro
On Wed, 2010-03-17 at 13:52 -0400, Jeff Layton wrote:
> On Wed, 17 Mar 2010 13:26:59 -0400
> Trond Myklebust <Trond.Myklebust@netapp.com> wrote:
>
> > On Wed, 2010-03-17 at 12:49 -0400, Christoph Hellwig wrote:
> > > On Thu, Mar 11, 2010 at 09:26:22AM -0500, Trond Myklebust wrote:
> > > > @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req)
> > > > */
> > > > int nfs_set_page_tag_locked(struct nfs_page *req)
> > > > {
> > > > - struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
> > > > -
> > > > if (!nfs_lock_request_dontget(req))
> > > > return 0;
> > > > if (req->wb_page != NULL)
> > > > - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > + radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > return 1;
> > > > }
> > > >
> > >
> > > This hunk is not only entirely unrealted to the fix, but also osbfucates
> > > the code.
> > >
> > > > @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
> > > > */
> > > > void nfs_clear_page_tag_locked(struct nfs_page *req)
> > > > {
> > > > - struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > - struct nfs_inode *nfsi = NFS_I(inode);
> > > > -
> > > > if (req->wb_page != NULL) {
> > > > + struct inode *inode = req->wb_context->path.dentry->d_inode;
> > > > + struct nfs_inode *nfsi = NFS_I(inode);
> > > > +
> > > > spin_lock(&inode->i_lock);
> > > > radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
> > > > nfs_unlock_request(req);
> > >
> > > Another one unrelated to the fix.
> > >
> >
> > No. They are needed because we don't want to dereference req->wb_context
> > after it (and req->wb_page) have been cleared. Without these 2 hunks,
> > the resulting kernel Oopses.
> >
>
> It seems like that just tightens up the race window without actually
> closing it.
>
> Just because wb_page was non-NULL when you checked it gives no
> guarantee that wb_context won't be NULL when you go to dereference it,
> right?
>
The above 2 functions are the ones that lock and unlock the request.
Once locked by means of the call to nfs_lock_request_dontget(req), no
other threads can change the contents of wb_page and wb_context.
IOW: yes, there is definitely such a guarantee...
Trond
^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH 12/12] NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping
[not found] ` <20100125221544.16750.70574.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-01-25 22:15 ` Trond Myklebust
2010-01-25 22:15 ` Trond Myklebust
` (8 subsequent siblings)
9 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Christoph Hellwig, Al Viro
Signed-off-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
---
fs/nfs/dir.c | 2 +-
fs/nfs/inode.c | 41 +----------------------------------------
fs/nfs/symlink.c | 2 +-
include/linux/nfs_fs.h | 1 -
4 files changed, 3 insertions(+), 43 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b..a1f6b44 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
desc->entry = &my_entry;
nfs_block_sillyrename(dentry);
- res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+ res = nfs_revalidate_mapping(inode, filp->f_mapping);
if (res < 0)
goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a867c4c..30988bc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -759,7 +759,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
return __nfs_revalidate_inode(server, inode);
}
-static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -780,49 +780,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
return 0;
}
-static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
- int ret = 0;
-
- mutex_lock(&inode->i_mutex);
- if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
- ret = nfs_sync_mapping(mapping);
- if (ret == 0)
- ret = nfs_invalidate_mapping_nolock(inode, mapping);
- }
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
-/**
- * nfs_revalidate_mapping_nolock - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- int ret = 0;
-
- if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
- || nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
- ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- if (ret < 0)
- goto out;
- }
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- ret = nfs_invalidate_mapping_nolock(inode, mapping);
-out:
- return ret;
-}
-
/**
* nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
* @mapping - pointer to mapping
- *
- * This version of the function will take the inode->i_mutex and attempt to
- * flush out all dirty data if it needs to invalidate the page cache.
*/
int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
{
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738d..2ea9e5c 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
struct page *page;
void *err;
- err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
+ err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
goto read_failed;
page = read_cache_page(&inode->i_data, 0,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6ee74c5..896a5f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -346,7 +346,6 @@ extern int nfs_attribute_timeout(struct inode *inode);
extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
extern int nfs_setattr(struct dentry *, struct iattr *);
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH 12/12] NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping
@ 2010-01-25 22:15 ` Trond Myklebust
0 siblings, 0 replies; 48+ messages in thread
From: Trond Myklebust @ 2010-01-25 22:15 UTC (permalink / raw)
To: linux-nfs
Cc: Wu Fengguang, Peter Zijlstra, Jan Kara, Steve Rago, Jens Axboe,
Peter Staubach, Arjan van de Ven, Ingo Molnar, linux-fsdevel,
Christoph Hellwig, Al Viro
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/dir.c | 2 +-
fs/nfs/inode.c | 41 +----------------------------------------
fs/nfs/symlink.c | 2 +-
include/linux/nfs_fs.h | 1 -
4 files changed, 3 insertions(+), 43 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b..a1f6b44 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
desc->entry = &my_entry;
nfs_block_sillyrename(dentry);
- res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+ res = nfs_revalidate_mapping(inode, filp->f_mapping);
if (res < 0)
goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a867c4c..30988bc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -759,7 +759,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
return __nfs_revalidate_inode(server, inode);
}
-static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -780,49 +780,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
return 0;
}
-static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
- int ret = 0;
-
- mutex_lock(&inode->i_mutex);
- if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
- ret = nfs_sync_mapping(mapping);
- if (ret == 0)
- ret = nfs_invalidate_mapping_nolock(inode, mapping);
- }
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
-/**
- * nfs_revalidate_mapping_nolock - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- int ret = 0;
-
- if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
- || nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
- ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- if (ret < 0)
- goto out;
- }
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- ret = nfs_invalidate_mapping_nolock(inode, mapping);
-out:
- return ret;
-}
-
/**
* nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
* @mapping - pointer to mapping
- *
- * This version of the function will take the inode->i_mutex and attempt to
- * flush out all dirty data if it needs to invalidate the page cache.
*/
int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
{
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738d..2ea9e5c 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
struct page *page;
void *err;
- err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
+ err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
goto read_failed;
page = read_cache_page(&inode->i_data, 0,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6ee74c5..896a5f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -346,7 +346,6 @@ extern int nfs_attribute_timeout(struct inode *inode);
extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
extern int nfs_setattr(struct dentry *, struct iattr *);
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
^ permalink raw reply related [flat|nested] 48+ messages in thread