All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] nfs: writeback pages wait queue
@ 2011-10-20 15:55 ` Wu Fengguang
  0 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-20 15:55 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML

The generic writeback routines are departing from congestion_wait()
in preference of get_request_wait(), aka. waiting on the block queues.

Introduce the missing writeback wait queue for NFS, otherwise its
writeback pages will grow out of control, exhausting all PG_dirty pages.

Feng: do more coarse grained throttle on each ->writepages rather than
on each page, for better performance and avoid throttled-before-send-rpc
deadlock

CC: Jens Axboe <axboe@kernel.dk>
CC: Chris Mason <chris.mason@oracle.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 fs/nfs/client.c           |    2 
 fs/nfs/write.c            |   84 +++++++++++++++++++++++++++++++-----
 include/linux/nfs_fs_sb.h |    1 
 3 files changed, 77 insertions(+), 10 deletions(-)

--- linux-next.orig/fs/nfs/write.c	2011-10-20 23:08:17.000000000 +0800
+++ linux-next/fs/nfs/write.c	2011-10-20 23:45:59.000000000 +0800
@@ -190,11 +190,64 @@ static int wb_priority(struct writeback_
  * NFS congestion control
  */
 
+#define NFS_WAIT_PAGES	(1024L >> (PAGE_SHIFT - 10))
 int nfs_congestion_kb;
 
-#define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10))
-#define NFS_CONGESTION_OFF_THRESH	\
-	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
+/*
+ * SYNC requests will block on (2*limit) and wakeup on (2*limit-NFS_WAIT_PAGES)
+ * ASYNC requests will block on (limit) and wakeup on (limit - NFS_WAIT_PAGES)
+ * In this way SYNC writes will never be blocked by ASYNC ones.
+ */
+
+static void nfs_set_congested(long nr, struct backing_dev_info *bdi)
+{
+	long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+	if (nr > limit && !test_bit(BDI_async_congested, &bdi->state))
+		set_bdi_congested(bdi, BLK_RW_ASYNC);
+	else if (nr > 2 * limit && !test_bit(BDI_sync_congested, &bdi->state))
+		set_bdi_congested(bdi, BLK_RW_SYNC);
+}
+
+static void nfs_wait_congested(int is_sync,
+			       struct backing_dev_info *bdi,
+			       wait_queue_head_t *wqh)
+{
+	int waitbit = is_sync ? BDI_sync_congested : BDI_async_congested;
+	DEFINE_WAIT(wait);
+
+	if (!test_bit(waitbit, &bdi->state))
+		return;
+
+	for (;;) {
+		prepare_to_wait(&wqh[is_sync], &wait, TASK_UNINTERRUPTIBLE);
+		if (!test_bit(waitbit, &bdi->state))
+			break;
+
+		io_schedule();
+	}
+	finish_wait(&wqh[is_sync], &wait);
+}
+
+static void nfs_wakeup_congested(long nr,
+				 struct backing_dev_info *bdi,
+				 wait_queue_head_t *wqh)
+{
+	long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+	if (nr < 2 * limit - min(limit / 8, NFS_WAIT_PAGES)) {
+		if (test_bit(BDI_sync_congested, &bdi->state))
+			clear_bdi_congested(bdi, BLK_RW_SYNC);
+		if (waitqueue_active(&wqh[BLK_RW_SYNC]))
+			wake_up(&wqh[BLK_RW_SYNC]);
+	}
+	if (nr < limit - min(limit / 8, NFS_WAIT_PAGES)) {
+		if (test_bit(BDI_async_congested, &bdi->state))
+			clear_bdi_congested(bdi, BLK_RW_ASYNC);
+		if (waitqueue_active(&wqh[BLK_RW_ASYNC]))
+			wake_up(&wqh[BLK_RW_ASYNC]);
+	}
+}
 
 static int nfs_set_page_writeback(struct page *page)
 {
@@ -205,11 +258,8 @@ static int nfs_set_page_writeback(struct
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
 		page_cache_get(page);
-		if (atomic_long_inc_return(&nfss->writeback) >
-				NFS_CONGESTION_ON_THRESH) {
-			set_bdi_congested(&nfss->backing_dev_info,
-						BLK_RW_ASYNC);
-		}
+		nfs_set_congested(atomic_long_inc_return(&nfss->writeback),
+				  &nfss->backing_dev_info);
 	}
 	return ret;
 }
@@ -221,8 +271,10 @@ static void nfs_end_page_writeback(struc
 
 	end_page_writeback(page);
 	page_cache_release(page);
-	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
+
+	nfs_wakeup_congested(atomic_long_dec_return(&nfss->writeback),
+			     &nfss->backing_dev_info,
+			     nfss->writeback_wait);
 }
 
 static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
@@ -323,10 +375,17 @@ static int nfs_writepage_locked(struct p
 
 int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
+	struct inode *inode = page->mapping->host;
+	struct nfs_server *nfss = NFS_SERVER(inode);
 	int ret;
 
 	ret = nfs_writepage_locked(page, wbc);
 	unlock_page(page);
+
+	nfs_wait_congested(wbc->sync_mode == WB_SYNC_ALL,
+			   &nfss->backing_dev_info,
+			   nfss->writeback_wait);
+
 	return ret;
 }
 
@@ -342,6 +401,7 @@ static int nfs_writepages_callback(struc
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
+	struct nfs_server *nfss = NFS_SERVER(inode);
 	unsigned long *bitlock = &NFS_I(inode)->flags;
 	struct nfs_pageio_descriptor pgio;
 	int err;
@@ -358,6 +418,10 @@ int nfs_writepages(struct address_space 
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
+	nfs_wait_congested(wbc->sync_mode == WB_SYNC_ALL,
+			   &nfss->backing_dev_info,
+			   nfss->writeback_wait);
+
 	clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
 	smp_mb__after_clear_bit();
 	wake_up_bit(bitlock, NFS_INO_FLUSHING);
--- linux-next.orig/include/linux/nfs_fs_sb.h	2011-10-20 23:08:17.000000000 +0800
+++ linux-next/include/linux/nfs_fs_sb.h	2011-10-20 23:45:12.000000000 +0800
@@ -102,6 +102,7 @@ struct nfs_server {
 	struct nfs_iostats __percpu *io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	atomic_long_t		writeback;	/* number of writeback pages */
+	wait_queue_head_t	writeback_wait[2];
 	int			flags;		/* various flags */
 	unsigned int		caps;		/* server capabilities */
 	unsigned int		rsize;		/* read size */
--- linux-next.orig/fs/nfs/client.c	2011-10-20 23:08:17.000000000 +0800
+++ linux-next/fs/nfs/client.c	2011-10-20 23:45:12.000000000 +0800
@@ -1066,6 +1066,8 @@ static struct nfs_server *nfs_alloc_serv
 	INIT_LIST_HEAD(&server->layouts);
 
 	atomic_set(&server->active, 0);
+	init_waitqueue_head(&server->writeback_wait[BLK_RW_SYNC]);
+	init_waitqueue_head(&server->writeback_wait[BLK_RW_ASYNC]);
 
 	server->io_stats = nfs_alloc_iostats();
 	if (!server->io_stats) {

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] nfs: writeback pages wait queue
@ 2011-10-20 15:55 ` Wu Fengguang
  0 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-20 15:55 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML

The generic writeback routines are departing from congestion_wait()
in preference of get_request_wait(), aka. waiting on the block queues.

Introduce the missing writeback wait queue for NFS, otherwise its
writeback pages will grow out of control, exhausting all PG_dirty pages.

Feng: do more coarse grained throttle on each ->writepages rather than
on each page, for better performance and avoid throttled-before-send-rpc
deadlock

CC: Jens Axboe <axboe@kernel.dk>
CC: Chris Mason <chris.mason@oracle.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 fs/nfs/client.c           |    2 
 fs/nfs/write.c            |   84 +++++++++++++++++++++++++++++++-----
 include/linux/nfs_fs_sb.h |    1 
 3 files changed, 77 insertions(+), 10 deletions(-)

--- linux-next.orig/fs/nfs/write.c	2011-10-20 23:08:17.000000000 +0800
+++ linux-next/fs/nfs/write.c	2011-10-20 23:45:59.000000000 +0800
@@ -190,11 +190,64 @@ static int wb_priority(struct writeback_
  * NFS congestion control
  */
 
+#define NFS_WAIT_PAGES	(1024L >> (PAGE_SHIFT - 10))
 int nfs_congestion_kb;
 
-#define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10))
-#define NFS_CONGESTION_OFF_THRESH	\
-	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
+/*
+ * SYNC requests will block on (2*limit) and wakeup on (2*limit-NFS_WAIT_PAGES)
+ * ASYNC requests will block on (limit) and wakeup on (limit - NFS_WAIT_PAGES)
+ * In this way SYNC writes will never be blocked by ASYNC ones.
+ */
+
+static void nfs_set_congested(long nr, struct backing_dev_info *bdi)
+{
+	long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+	if (nr > limit && !test_bit(BDI_async_congested, &bdi->state))
+		set_bdi_congested(bdi, BLK_RW_ASYNC);
+	else if (nr > 2 * limit && !test_bit(BDI_sync_congested, &bdi->state))
+		set_bdi_congested(bdi, BLK_RW_SYNC);
+}
+
+static void nfs_wait_congested(int is_sync,
+			       struct backing_dev_info *bdi,
+			       wait_queue_head_t *wqh)
+{
+	int waitbit = is_sync ? BDI_sync_congested : BDI_async_congested;
+	DEFINE_WAIT(wait);
+
+	if (!test_bit(waitbit, &bdi->state))
+		return;
+
+	for (;;) {
+		prepare_to_wait(&wqh[is_sync], &wait, TASK_UNINTERRUPTIBLE);
+		if (!test_bit(waitbit, &bdi->state))
+			break;
+
+		io_schedule();
+	}
+	finish_wait(&wqh[is_sync], &wait);
+}
+
+static void nfs_wakeup_congested(long nr,
+				 struct backing_dev_info *bdi,
+				 wait_queue_head_t *wqh)
+{
+	long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+	if (nr < 2 * limit - min(limit / 8, NFS_WAIT_PAGES)) {
+		if (test_bit(BDI_sync_congested, &bdi->state))
+			clear_bdi_congested(bdi, BLK_RW_SYNC);
+		if (waitqueue_active(&wqh[BLK_RW_SYNC]))
+			wake_up(&wqh[BLK_RW_SYNC]);
+	}
+	if (nr < limit - min(limit / 8, NFS_WAIT_PAGES)) {
+		if (test_bit(BDI_async_congested, &bdi->state))
+			clear_bdi_congested(bdi, BLK_RW_ASYNC);
+		if (waitqueue_active(&wqh[BLK_RW_ASYNC]))
+			wake_up(&wqh[BLK_RW_ASYNC]);
+	}
+}
 
 static int nfs_set_page_writeback(struct page *page)
 {
@@ -205,11 +258,8 @@ static int nfs_set_page_writeback(struct
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
 		page_cache_get(page);
-		if (atomic_long_inc_return(&nfss->writeback) >
-				NFS_CONGESTION_ON_THRESH) {
-			set_bdi_congested(&nfss->backing_dev_info,
-						BLK_RW_ASYNC);
-		}
+		nfs_set_congested(atomic_long_inc_return(&nfss->writeback),
+				  &nfss->backing_dev_info);
 	}
 	return ret;
 }
@@ -221,8 +271,10 @@ static void nfs_end_page_writeback(struc
 
 	end_page_writeback(page);
 	page_cache_release(page);
-	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
+
+	nfs_wakeup_congested(atomic_long_dec_return(&nfss->writeback),
+			     &nfss->backing_dev_info,
+			     nfss->writeback_wait);
 }
 
 static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
@@ -323,10 +375,17 @@ static int nfs_writepage_locked(struct p
 
 int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
+	struct inode *inode = page->mapping->host;
+	struct nfs_server *nfss = NFS_SERVER(inode);
 	int ret;
 
 	ret = nfs_writepage_locked(page, wbc);
 	unlock_page(page);
+
+	nfs_wait_congested(wbc->sync_mode == WB_SYNC_ALL,
+			   &nfss->backing_dev_info,
+			   nfss->writeback_wait);
+
 	return ret;
 }
 
@@ -342,6 +401,7 @@ static int nfs_writepages_callback(struc
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
+	struct nfs_server *nfss = NFS_SERVER(inode);
 	unsigned long *bitlock = &NFS_I(inode)->flags;
 	struct nfs_pageio_descriptor pgio;
 	int err;
@@ -358,6 +418,10 @@ int nfs_writepages(struct address_space 
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
+	nfs_wait_congested(wbc->sync_mode == WB_SYNC_ALL,
+			   &nfss->backing_dev_info,
+			   nfss->writeback_wait);
+
 	clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
 	smp_mb__after_clear_bit();
 	wake_up_bit(bitlock, NFS_INO_FLUSHING);
--- linux-next.orig/include/linux/nfs_fs_sb.h	2011-10-20 23:08:17.000000000 +0800
+++ linux-next/include/linux/nfs_fs_sb.h	2011-10-20 23:45:12.000000000 +0800
@@ -102,6 +102,7 @@ struct nfs_server {
 	struct nfs_iostats __percpu *io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	atomic_long_t		writeback;	/* number of writeback pages */
+	wait_queue_head_t	writeback_wait[2];
 	int			flags;		/* various flags */
 	unsigned int		caps;		/* server capabilities */
 	unsigned int		rsize;		/* read size */
--- linux-next.orig/fs/nfs/client.c	2011-10-20 23:08:17.000000000 +0800
+++ linux-next/fs/nfs/client.c	2011-10-20 23:45:12.000000000 +0800
@@ -1066,6 +1066,8 @@ static struct nfs_server *nfs_alloc_serv
 	INIT_LIST_HEAD(&server->layouts);
 
 	atomic_set(&server->active, 0);
+	init_waitqueue_head(&server->writeback_wait[BLK_RW_SYNC]);
+	init_waitqueue_head(&server->writeback_wait[BLK_RW_ASYNC]);
 
 	server->io_stats = nfs_alloc_iostats();
 	if (!server->io_stats) {

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/2] nfs: scale writeback threshold proportional to dirty threshold
  2011-10-20 15:55 ` Wu Fengguang
@ 2011-10-20 15:56   ` Wu Fengguang
  -1 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-20 15:56 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML, Tang Feng

nfs_congestion_kb is to control the max allowed writeback and in-commit
pages. It's not reasonable for them to outnumber dirty and to-commit
pages. So each of them should not take more than 1/4 dirty threshold.

Considering that nfs_init_writepagecache() is called on fresh boot,
at the time dirty_thresh is much higher than the real dirty limit after
lots of user space memory consumptions, use 1/8 instead.

Feng: fix deadlock by preventing (nfs_congestion_kb == 0)

CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 fs/nfs/write.c      |   36 +++++++++++++++++-------------------
 mm/page-writeback.c |    6 ++++++
 2 files changed, 23 insertions(+), 19 deletions(-)

--- linux-next.orig/fs/nfs/write.c	2011-10-20 23:45:59.000000000 +0800
+++ linux-next/fs/nfs/write.c	2011-10-20 23:53:16.000000000 +0800
@@ -1782,6 +1782,22 @@ out:
 }
 #endif
 
+void nfs_update_congestion_thresh(void)
+{
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+
+	/*
+	 * Limit to 1/8 dirty threshold, so that writeback+in_commit pages
+	 * won't overnumber dirty+to_commit pages.
+	 */
+	global_dirty_limits(&background_thresh, &dirty_thresh);
+	dirty_thresh <<= PAGE_SHIFT - 10;
+	dirty_thresh += 1024;
+
+	nfs_congestion_kb = dirty_thresh / 8;
+}
+
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
@@ -1801,25 +1817,7 @@ int __init nfs_init_writepagecache(void)
 	if (nfs_commit_mempool == NULL)
 		return -ENOMEM;
 
-	/*
-	 * NFS congestion size, scale with available memory.
-	 *
-	 *  64MB:    8192k
-	 * 128MB:   11585k
-	 * 256MB:   16384k
-	 * 512MB:   23170k
-	 *   1GB:   32768k
-	 *   2GB:   46340k
-	 *   4GB:   65536k
-	 *   8GB:   92681k
-	 *  16GB:  131072k
-	 *
-	 * This allows larger machines to have larger/more transfers.
-	 * Limit the default to 256M
-	 */
-	nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
-	if (nfs_congestion_kb > 256*1024)
-		nfs_congestion_kb = 256*1024;
+	nfs_update_congestion_thresh();
 
 	return 0;
 }
--- linux-next.orig/mm/page-writeback.c	2011-10-20 23:45:23.000000000 +0800
+++ linux-next/mm/page-writeback.c	2011-10-20 23:48:07.000000000 +0800
@@ -207,6 +207,10 @@ static int calc_period_shift(void)
 	return 2 + ilog2(dirty_total - 1);
 }
 
+void __weak nfs_update_congestion_thresh(void)
+{
+}
+
 /*
  * update the period when the dirty threshold changes.
  */
@@ -217,6 +221,7 @@ static void update_completion_period(voi
 	prop_change_shift(&vm_dirties, shift);
 
 	writeback_set_ratelimit();
+	nfs_update_congestion_thresh();
 }
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
@@ -447,6 +452,7 @@ unsigned long bdi_dirty_limit(struct bac
 
 	return bdi_dirty;
 }
+EXPORT_SYMBOL_GPL(global_dirty_limits);
 
 /*
  * Dirty position control.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/2] nfs: scale writeback threshold proportional to dirty threshold
@ 2011-10-20 15:56   ` Wu Fengguang
  0 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-20 15:56 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML, Tang Feng

nfs_congestion_kb is to control the max allowed writeback and in-commit
pages. It's not reasonable for them to outnumber dirty and to-commit
pages. So each of them should not take more than 1/4 dirty threshold.

Considering that nfs_init_writepagecache() is called on fresh boot,
at the time dirty_thresh is much higher than the real dirty limit after
lots of user space memory consumptions, use 1/8 instead.

Feng: fix deadlock by preventing (nfs_congestion_kb == 0)

CC: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 fs/nfs/write.c      |   36 +++++++++++++++++-------------------
 mm/page-writeback.c |    6 ++++++
 2 files changed, 23 insertions(+), 19 deletions(-)

--- linux-next.orig/fs/nfs/write.c	2011-10-20 23:45:59.000000000 +0800
+++ linux-next/fs/nfs/write.c	2011-10-20 23:53:16.000000000 +0800
@@ -1782,6 +1782,22 @@ out:
 }
 #endif
 
+void nfs_update_congestion_thresh(void)
+{
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+
+	/*
+	 * Limit to 1/8 dirty threshold, so that writeback+in_commit pages
+	 * won't overnumber dirty+to_commit pages.
+	 */
+	global_dirty_limits(&background_thresh, &dirty_thresh);
+	dirty_thresh <<= PAGE_SHIFT - 10;
+	dirty_thresh += 1024;
+
+	nfs_congestion_kb = dirty_thresh / 8;
+}
+
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
@@ -1801,25 +1817,7 @@ int __init nfs_init_writepagecache(void)
 	if (nfs_commit_mempool == NULL)
 		return -ENOMEM;
 
-	/*
-	 * NFS congestion size, scale with available memory.
-	 *
-	 *  64MB:    8192k
-	 * 128MB:   11585k
-	 * 256MB:   16384k
-	 * 512MB:   23170k
-	 *   1GB:   32768k
-	 *   2GB:   46340k
-	 *   4GB:   65536k
-	 *   8GB:   92681k
-	 *  16GB:  131072k
-	 *
-	 * This allows larger machines to have larger/more transfers.
-	 * Limit the default to 256M
-	 */
-	nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
-	if (nfs_congestion_kb > 256*1024)
-		nfs_congestion_kb = 256*1024;
+	nfs_update_congestion_thresh();
 
 	return 0;
 }
--- linux-next.orig/mm/page-writeback.c	2011-10-20 23:45:23.000000000 +0800
+++ linux-next/mm/page-writeback.c	2011-10-20 23:48:07.000000000 +0800
@@ -207,6 +207,10 @@ static int calc_period_shift(void)
 	return 2 + ilog2(dirty_total - 1);
 }
 
+void __weak nfs_update_congestion_thresh(void)
+{
+}
+
 /*
  * update the period when the dirty threshold changes.
  */
@@ -217,6 +221,7 @@ static void update_completion_period(voi
 	prop_change_shift(&vm_dirties, shift);
 
 	writeback_set_ratelimit();
+	nfs_update_congestion_thresh();
 }
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
@@ -447,6 +452,7 @@ unsigned long bdi_dirty_limit(struct bac
 
 	return bdi_dirty;
 }
+EXPORT_SYMBOL_GPL(global_dirty_limits);
 
 /*
  * Dirty position control.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] nfs: writeback pages wait queue
  2011-10-20 15:55 ` Wu Fengguang
@ 2011-10-20 16:05   ` Wu Fengguang
  -1 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-20 16:05 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML

Trond,

After applying these two patches, the IO-less patchset performances
45% better than the vanilla kernel and the average commit size only
decreases by -16% in the common NFS-thresh=1G/nfs-1dd case :)

Thanks,
Fengguang

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  354.65       +45.4%       515.48  TOTAL write_bw
                10498.00       +91.7%     20120.00  TOTAL nfs_nr_commits
               233013.00       +99.9%    465751.00  TOTAL nfs_nr_writes
                  895.47        +3.1%       923.62  TOTAL nfs_commit_size
                    5.71       -14.5%         4.88  TOTAL nfs_write_size
               108269.33       -84.3%     17003.69  TOTAL nfs_write_queue_time
                 1836.03       -34.4%      1204.27  TOTAL nfs_write_rtt_time
               110144.96       -83.5%     18220.96  TOTAL nfs_write_execute_time
                 2902.62       -88.6%       332.20  TOTAL nfs_commit_queue_time
                16282.75       -23.3%     12490.87  TOTAL nfs_commit_rtt_time
                19234.16       -33.3%     12833.00  TOTAL nfs_commit_execute_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                   21.85       +97.9%        43.23  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                   51.38       +42.6%        73.26  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   28.81      +145.3%        70.68  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                   13.74       +57.1%        21.59  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                   29.11        -0.3%        29.02  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                   16.68       +90.5%        31.78  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                   48.88       +41.2%        69.01  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   57.85       +32.7%        76.74  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                   47.13       +63.1%        76.87  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    9.82       -33.0%         6.58  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   13.72       -18.1%        11.24  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   15.68       -65.0%         5.48  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                  354.65       +45.4%       515.48  TOTAL write_bw

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  834.00      +224.2%      2704.00  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  311.00      +144.1%       759.00  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                  282.00      +253.5%       997.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                 1387.00      +334.2%      6023.00  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                 1081.00      +280.3%      4111.00  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  930.00      +368.0%      4352.00  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                  254.00      +108.7%       530.00  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   38.00       +55.3%        59.00  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                   54.00       +96.3%       106.00  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                 1321.00       -74.9%       332.00  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                 1932.00       -99.1%        17.00  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                 2074.00       -93.7%       130.00  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                10498.00       +91.7%     20120.00  TOTAL nfs_nr_commits

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                28359.00       -39.2%     17230.00  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                22241.00      +550.6%    144695.00  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                24969.00       +27.8%     31900.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                21722.00       +38.2%     30030.00  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                11015.00       +28.2%     14117.00  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                17012.00      +217.7%     54039.00  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                25616.00        +3.1%     26403.00  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                24761.00      +177.5%     68702.00  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                29235.00       +37.1%     40089.00  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                12929.00       +21.6%     15720.00  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                 7683.00       +24.2%      9542.00  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                 7471.00       +77.8%     13284.00  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
               233013.00       +99.9%    465751.00  TOTAL nfs_nr_writes

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                    7.84       -38.6%         4.81  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                   49.58       -41.6%        28.94  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   30.58       -30.5%        21.27  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                    2.99       -63.9%         1.08  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    8.06       -73.8%         2.12  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                    5.33       -58.9%         2.19  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                   57.68       -32.1%        39.15  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                  465.15       -16.5%       388.43  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                  261.60       -16.4%       218.80  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    2.25      +163.5%         5.93  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    2.13     +9221.2%       198.29  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                    2.27      +455.3%        12.61  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                  895.47        +3.1%       923.62  TOTAL nfs_commit_size

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                    0.23      +227.7%         0.76  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                    0.69       -78.1%         0.15  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                    0.35       +92.4%         0.66  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                    0.19       +13.4%         0.22  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    0.79       -22.2%         0.62  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                    0.29       -39.5%         0.18  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                    0.57       +37.4%         0.79  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                    0.71       -53.3%         0.33  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                    0.48       +19.7%         0.58  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    0.23       -45.5%         0.13  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    0.53       -34.0%         0.35  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                    0.63       -80.4%         0.12  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                    5.71       -14.5%         4.88  TOTAL nfs_write_size

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                 6544.25       -95.1%       321.04  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                 1064.82       +11.2%      1184.16  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                22801.48       -86.3%      3113.39  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                 1083.47       -99.8%         2.56  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    3.82       -55.8%         1.69  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                 2840.08       -99.3%        20.09  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                20227.73       -96.6%       683.65  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 2346.04      +274.0%      8774.87  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                50812.68       -94.3%      2901.88  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                  417.03       -99.9%         0.25  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    1.70       -97.9%         0.04  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                  126.23       -99.9%         0.08  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
               108269.33       -84.3%     17003.69  TOTAL nfs_write_queue_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  276.99       -41.1%       163.20  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  106.71       -67.0%        35.21  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   76.32       +13.4%        86.53  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                  335.96       -41.8%       195.49  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                   33.67       +70.7%        57.48  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  159.03       -46.0%        85.80  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                  340.25       -67.6%       110.23  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   47.88       +12.7%        53.96  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                  118.13       -53.8%        54.62  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                  223.24       -53.0%       104.83  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   58.89       +12.8%        66.43  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   58.97      +223.0%       190.49  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                 1836.03       -34.4%      1204.27  TOTAL nfs_write_rtt_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                 6821.43       -92.9%       484.70  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                 1173.98        +4.0%      1220.80  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                22878.44       -86.0%      3201.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                 1419.50       -86.0%       198.20  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                   37.72       +57.1%        59.27  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                 2999.22       -96.5%       106.41  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                20570.86       -96.1%       795.46  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 2416.09      +265.6%      8832.81  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                50941.27       -94.2%      2960.10  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                  640.32       -83.6%       105.13  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   60.78        +9.4%        66.49  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                  185.35        +2.8%       190.59  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
               110144.96       -83.5%     18220.96  TOTAL nfs_write_execute_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                   54.75       -89.4%         5.82  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                   88.26       -98.7%         1.12  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   38.41       -92.1%         3.05  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                    7.59       -91.0%         0.68  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    0.42       -93.3%         0.03  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                    2.57       -75.1%         0.64  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                  784.08       -93.8%        48.69  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 1338.39       -81.4%       248.51  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                  586.69       -96.0%        23.32  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    1.27       -84.3%         0.20  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    0.02      +147.1%         0.06  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                    0.16       -41.3%         0.09  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                 2902.62       -88.6%       332.20  TOTAL nfs_commit_queue_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  702.80        +8.2%       760.66  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  538.99       -35.8%       346.08  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                  704.42       -37.1%       443.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                  228.96       -18.4%       186.78  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                  155.88       -54.6%        70.75  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  169.51       -28.9%       120.53  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                 3791.44       -11.4%      3361.05  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 4229.79       -17.8%      3476.80  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                 5534.04       -35.4%      3574.73  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                   96.34       -31.4%        66.11  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   60.95       -35.5%        39.29  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   69.64       -35.3%        45.08  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                16282.75       -23.3%     12490.87  TOTAL nfs_commit_rtt_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  757.92        +1.2%       766.73  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  627.36       -44.6%       347.25  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                  743.59       -40.0%       446.39  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                  236.73       -20.8%       187.57  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                  156.31       -54.7%        70.79  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  172.16       -29.6%       121.26  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                 4579.56       -25.5%      3411.34  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 5568.53       -33.1%      3725.49  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                 6163.54       -41.5%      3605.27  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                   97.67       -32.1%        66.35  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   60.99       -35.5%        39.35  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   69.82       -35.3%        45.20  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                19234.16       -33.3%     12833.00  TOTAL nfs_commit_execute_time

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] nfs: writeback pages wait queue
@ 2011-10-20 16:05   ` Wu Fengguang
  0 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-20 16:05 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML

Trond,

After applying these two patches, the IO-less patchset performances
45% better than the vanilla kernel and the average commit size only
decreases by -16% in the common NFS-thresh=1G/nfs-1dd case :)

Thanks,
Fengguang

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  354.65       +45.4%       515.48  TOTAL write_bw
                10498.00       +91.7%     20120.00  TOTAL nfs_nr_commits
               233013.00       +99.9%    465751.00  TOTAL nfs_nr_writes
                  895.47        +3.1%       923.62  TOTAL nfs_commit_size
                    5.71       -14.5%         4.88  TOTAL nfs_write_size
               108269.33       -84.3%     17003.69  TOTAL nfs_write_queue_time
                 1836.03       -34.4%      1204.27  TOTAL nfs_write_rtt_time
               110144.96       -83.5%     18220.96  TOTAL nfs_write_execute_time
                 2902.62       -88.6%       332.20  TOTAL nfs_commit_queue_time
                16282.75       -23.3%     12490.87  TOTAL nfs_commit_rtt_time
                19234.16       -33.3%     12833.00  TOTAL nfs_commit_execute_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                   21.85       +97.9%        43.23  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                   51.38       +42.6%        73.26  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   28.81      +145.3%        70.68  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                   13.74       +57.1%        21.59  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                   29.11        -0.3%        29.02  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                   16.68       +90.5%        31.78  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                   48.88       +41.2%        69.01  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   57.85       +32.7%        76.74  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                   47.13       +63.1%        76.87  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    9.82       -33.0%         6.58  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   13.72       -18.1%        11.24  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   15.68       -65.0%         5.48  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                  354.65       +45.4%       515.48  TOTAL write_bw

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  834.00      +224.2%      2704.00  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  311.00      +144.1%       759.00  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                  282.00      +253.5%       997.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                 1387.00      +334.2%      6023.00  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                 1081.00      +280.3%      4111.00  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  930.00      +368.0%      4352.00  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                  254.00      +108.7%       530.00  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   38.00       +55.3%        59.00  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                   54.00       +96.3%       106.00  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                 1321.00       -74.9%       332.00  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                 1932.00       -99.1%        17.00  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                 2074.00       -93.7%       130.00  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                10498.00       +91.7%     20120.00  TOTAL nfs_nr_commits

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                28359.00       -39.2%     17230.00  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                22241.00      +550.6%    144695.00  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                24969.00       +27.8%     31900.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                21722.00       +38.2%     30030.00  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                11015.00       +28.2%     14117.00  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                17012.00      +217.7%     54039.00  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                25616.00        +3.1%     26403.00  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                24761.00      +177.5%     68702.00  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                29235.00       +37.1%     40089.00  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                12929.00       +21.6%     15720.00  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                 7683.00       +24.2%      9542.00  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                 7471.00       +77.8%     13284.00  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
               233013.00       +99.9%    465751.00  TOTAL nfs_nr_writes

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                    7.84       -38.6%         4.81  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                   49.58       -41.6%        28.94  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   30.58       -30.5%        21.27  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                    2.99       -63.9%         1.08  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    8.06       -73.8%         2.12  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                    5.33       -58.9%         2.19  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                   57.68       -32.1%        39.15  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                  465.15       -16.5%       388.43  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                  261.60       -16.4%       218.80  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    2.25      +163.5%         5.93  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    2.13     +9221.2%       198.29  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                    2.27      +455.3%        12.61  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                  895.47        +3.1%       923.62  TOTAL nfs_commit_size

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                    0.23      +227.7%         0.76  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                    0.69       -78.1%         0.15  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                    0.35       +92.4%         0.66  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                    0.19       +13.4%         0.22  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    0.79       -22.2%         0.62  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                    0.29       -39.5%         0.18  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                    0.57       +37.4%         0.79  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                    0.71       -53.3%         0.33  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                    0.48       +19.7%         0.58  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    0.23       -45.5%         0.13  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    0.53       -34.0%         0.35  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                    0.63       -80.4%         0.12  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                    5.71       -14.5%         4.88  TOTAL nfs_write_size

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                 6544.25       -95.1%       321.04  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                 1064.82       +11.2%      1184.16  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                22801.48       -86.3%      3113.39  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                 1083.47       -99.8%         2.56  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    3.82       -55.8%         1.69  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                 2840.08       -99.3%        20.09  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                20227.73       -96.6%       683.65  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 2346.04      +274.0%      8774.87  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                50812.68       -94.3%      2901.88  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                  417.03       -99.9%         0.25  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    1.70       -97.9%         0.04  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                  126.23       -99.9%         0.08  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
               108269.33       -84.3%     17003.69  TOTAL nfs_write_queue_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  276.99       -41.1%       163.20  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  106.71       -67.0%        35.21  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   76.32       +13.4%        86.53  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                  335.96       -41.8%       195.49  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                   33.67       +70.7%        57.48  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  159.03       -46.0%        85.80  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                  340.25       -67.6%       110.23  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   47.88       +12.7%        53.96  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                  118.13       -53.8%        54.62  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                  223.24       -53.0%       104.83  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   58.89       +12.8%        66.43  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   58.97      +223.0%       190.49  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                 1836.03       -34.4%      1204.27  TOTAL nfs_write_rtt_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                 6821.43       -92.9%       484.70  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                 1173.98        +4.0%      1220.80  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                22878.44       -86.0%      3201.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                 1419.50       -86.0%       198.20  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                   37.72       +57.1%        59.27  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                 2999.22       -96.5%       106.41  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                20570.86       -96.1%       795.46  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 2416.09      +265.6%      8832.81  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                50941.27       -94.2%      2960.10  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                  640.32       -83.6%       105.13  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   60.78        +9.4%        66.49  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                  185.35        +2.8%       190.59  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
               110144.96       -83.5%     18220.96  TOTAL nfs_write_execute_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                   54.75       -89.4%         5.82  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                   88.26       -98.7%         1.12  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   38.41       -92.1%         3.05  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                    7.59       -91.0%         0.68  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                    0.42       -93.3%         0.03  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                    2.57       -75.1%         0.64  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                  784.08       -93.8%        48.69  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 1338.39       -81.4%       248.51  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                  586.69       -96.0%        23.32  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    1.27       -84.3%         0.20  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                    0.02      +147.1%         0.06  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                    0.16       -41.3%         0.09  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                 2902.62       -88.6%       332.20  TOTAL nfs_commit_queue_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  702.80        +8.2%       760.66  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  538.99       -35.8%       346.08  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                  704.42       -37.1%       443.00  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                  228.96       -18.4%       186.78  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                  155.88       -54.6%        70.75  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  169.51       -28.9%       120.53  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                 3791.44       -11.4%      3361.05  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 4229.79       -17.8%      3476.80  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                 5534.04       -35.4%      3574.73  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                   96.34       -31.4%        66.11  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   60.95       -35.5%        39.29  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   69.64       -35.3%        45.08  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                16282.75       -23.3%     12490.87  TOTAL nfs_commit_rtt_time

      3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
------------------------  ------------------------
                  757.92        +1.2%       766.73  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                  627.36       -44.6%       347.25  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                  743.59       -40.0%       446.39  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                  236.73       -20.8%       187.57  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                  156.31       -54.7%        70.79  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                  172.16       -29.6%       121.26  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                 4579.56       -25.5%      3411.34  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                 5568.53       -33.1%      3725.49  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                 6163.54       -41.5%      3605.27  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                   97.67       -32.1%        66.35  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   60.99       -35.5%        39.35  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   69.82       -35.3%        45.20  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                19234.16       -33.3%     12833.00  TOTAL nfs_commit_execute_time

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] nfs: writeback pages wait queue
  2011-10-20 16:05   ` Wu Fengguang
  (?)
@ 2011-10-22  7:34   ` Wu Fengguang
  -1 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-22  7:34 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML

[-- Attachment #1: Type: text/plain, Size: 2115 bytes --]

Hi,

Aside from the big improvements in the 1G,100M,10M cases, the
thresh=1M cases show big regressions. But there is a good reason.

>       3.1.0-rc8-vanilla+        3.1.0-rc8-nfs-wq4+
> ------------------------  ------------------------
>                    21.85       +97.9%        43.23  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
>                    51.38       +42.6%        73.26  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
>                    28.81      +145.3%        70.68  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
>                    13.74       +57.1%        21.59  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
>                    29.11        -0.3%        29.02  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
>                    16.68       +90.5%        31.78  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
>                    48.88       +41.2%        69.01  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
>                    57.85       +32.7%        76.74  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
>                    47.13       +63.1%        76.87  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
>                     9.82       -33.0%         6.58  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
>                    13.72       -18.1%        11.24  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
>                    15.68       -65.0%         5.48  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
>                   354.65       +45.4%       515.48  TOTAL write_bw

The regressions in the thresh=1M cases are reasonably caused by the
much reduced dirty/writeback/unstable pages.

The attached graphs for the NFS-thresh=1M/nfs-10dd cases show the
differences. The vanilla kernel (first graph) is much more permissive
to allow dirty pages to exceed the global dirty limit, while the
IO-less one applies the global dirty limit much more rigidly.

Given the 18MB vs. 3MB max dirty+writeback+unstable pages, it's not
surprising to see the better performance in the vanilla kernel. And
it does not mean anything inherently wrong in the IO-less logic.

Thanks,
Fengguang

[-- Attachment #2: global_dirty_state.png --]
[-- Type: image/png, Size: 271327 bytes --]

[-- Attachment #3: global_dirty_state.png --]
[-- Type: image/png, Size: 189314 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] nfs: writeback pages wait queue
  2011-10-20 16:05   ` Wu Fengguang
  (?)
  (?)
@ 2011-10-23 15:54   ` Wu Fengguang
  2011-10-25 21:08     ` Wu Fengguang
  -1 siblings, 1 reply; 9+ messages in thread
From: Wu Fengguang @ 2011-10-23 15:54 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML

[-- Attachment #1: Type: text/plain, Size: 808 bytes --]

On Fri, Oct 21, 2011 at 12:05:30AM +0800, Wu Fengguang wrote:
> Trond,
> 
> After applying these two patches, the IO-less patchset performances
> 45% better than the vanilla kernel and the average commit size only
> decreases by -16% in the common NFS-thresh=1G/nfs-1dd case :)

To better understand how the NFS writeback wait queue helps, I
visualized the network traffic over time. Attached are the graphs for
the vanilla kernel and the one with the IO-less + NFS wait queue
patches.

nfs-1dd-4k-32p-32016M-1024M:10-3.1.0-rc8-vanilla+/dstat-bw.png
nfs-1dd-4k-32p-31951M-1024M:10-3.1.0-rc8-nfs-wq4+/dstat-bw.png

The obvious difference is, the network traffic become now more
distributed and the "zero traffic" periods are mostly reduced.

The other 2dd, 10dd cases have similar results.

Thanks,
Fengguang

[-- Attachment #2: dstat-bw.png --]
[-- Type: image/png, Size: 23197 bytes --]

[-- Attachment #3: dstat-bw.png --]
[-- Type: image/png, Size: 22149 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] nfs: writeback pages wait queue
  2011-10-23 15:54   ` Wu Fengguang
@ 2011-10-25 21:08     ` Wu Fengguang
  0 siblings, 0 replies; 9+ messages in thread
From: Wu Fengguang @ 2011-10-25 21:08 UTC (permalink / raw)
  To: Trond Myklebust, linux-nfs
  Cc: Peter Zijlstra, linux-fsdevel, Andrew Morton, Jan Kara,
	Christoph Hellwig, Dave Chinner, Greg Thelen, Minchan Kim,
	Vivek Goyal, Andrea Righi, linux-mm, LKML

[-- Attachment #1: Type: text/plain, Size: 7283 bytes --]

On Sun, Oct 23, 2011 at 11:54:39PM +0800, Wu Fengguang wrote:
> On Fri, Oct 21, 2011 at 12:05:30AM +0800, Wu Fengguang wrote:
> > Trond,
> > 
> > After applying these two patches, the IO-less patchset performances
> > 45% better than the vanilla kernel and the average commit size only
> > decreases by -16% in the common NFS-thresh=1G/nfs-1dd case :)
> 
> To better understand how the NFS writeback wait queue helps, I
> visualized the network traffic over time. Attached are the graphs for
> the vanilla kernel and the one with the IO-less + NFS wait queue
> patches.
> 
> nfs-1dd-4k-32p-32016M-1024M:10-3.1.0-rc8-vanilla+/dstat-bw.png
> nfs-1dd-4k-32p-31951M-1024M:10-3.1.0-rc8-nfs-wq4+/dstat-bw.png
> 
> The obvious difference is, the network traffic become now more
> distributed and the "zero traffic" periods are mostly reduced.
> 
> The other 2dd, 10dd cases have similar results.

To better explore the internal NFS server states and to make it clear
where the performance gain comes from, I collected the dstat data in
the NFS server and visualized the network/disk throughputs over time.
The results are attached for the following test cases. 

nfs-1dd-4k-32p-32016M-1024M:10-3.1.0-rc10-vanilla+
nfs-1dd-4k-32p-31951M-1024M:10-3.1.0-rc9-ioless-full-nfs-wq5-next-20111014+

In the above cases, the vanilla kernel is shown to have roughly
_interleaved_ disk/net activities. After the patches, the disk/net
pipeline is well established, hence the impressive performance improvements.

     3.1.0-rc10-vanilla+  3.1.0-rc9-ioless-full-nfs-wq5-next-20111014+  
------------------------  ------------------------  
                   57.48       +25.5%        72.11  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   63.86       +27.1%        81.15  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                   47.51       +80.1%        85.54  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                  168.85       +41.4%       238.80  TOTAL write_bw

          3.1.0-vanilla+  3.1.0-rc9-ioless-full-nfs-wq5-next-20111014+
------------------------  ------------------------
                   21.93       +97.0%        43.21  NFS-thresh=100M/nfs-10dd-4k-32p-32768M-100M:10-X
                   55.91       +27.4%        71.23  NFS-thresh=100M/nfs-1dd-4k-32p-32768M-100M:10-X
                   26.73      +129.2%        61.25  NFS-thresh=100M/nfs-2dd-4k-32p-32768M-100M:10-X
                   15.66       +40.3%        21.96  NFS-thresh=10M/nfs-10dd-4k-32p-32768M-10M:10-X
                   29.01        +0.7%        29.20  NFS-thresh=10M/nfs-1dd-4k-32p-32768M-10M:10-X
                   20.52        +7.9%        22.13  NFS-thresh=10M/nfs-2dd-4k-32p-32768M-10M:10-X
                   56.30       +28.1%        72.11  NFS-thresh=1G/nfs-10dd-4k-32p-32768M-1024M:10-X
                   63.33       +28.1%        81.15  NFS-thresh=1G/nfs-1dd-4k-32p-32768M-1024M:10-X
                   51.95       +64.6%        85.54  NFS-thresh=1G/nfs-2dd-4k-32p-32768M-1024M:10-X
                    9.87       -48.2%         5.11  NFS-thresh=1M/nfs-10dd-4k-32p-32768M-1M:10-X
                   14.52       -24.0%        11.03  NFS-thresh=1M/nfs-1dd-4k-32p-32768M-1M:10-X
                   15.69       -67.4%         5.12  NFS-thresh=1M/nfs-2dd-4k-32p-32768M-1M:10-X
                  381.42       +33.5%       509.05  TOTAL write_bw

The vanilla kernel's disk idle periods indicate that the background
flusher work is not running. This is true. The NFS server has 12GB
memory and hence 1.2GB background dirty thresh. So when the client
side has dirty thresh <= 1GB, it won't send enough data to trigger
the background work in NFS server. So the server disk IOs will be
triggered mostly by NFS commits. The patched kernel seems to generate
smoother commit pattern to prevent most disk idles.

To explore how it performs when the NFS server's background writeback
kicks in, I further tried lowering the NFS server's dirty thresholds
to

dirty_bytes = 1GB, background threshold = 512MB
-----------------------------------------------
graphs attached for cases:
        nfs-1dd-1M-32p-32016M-1024M:10-3.1.0-vanilla+
        nfs-1dd-1M-32p-31951M-1024M:10-3.1.0-rc9-ioless-full-nfs-wq5-next-20111014+

          3.1.0-vanilla+  3.1.0-rc9-ioless-full-nfs-wq5-next-20111014+
------------------------  ------------------------
                   25.23       +71.2%        43.21  NFS-thresh=100M/nfs-10dd-1M-32p-32768M-100M:10-X
                   60.21       +15.1%        69.29  NFS-thresh=100M/nfs-1dd-1M-32p-32768M-100M:10-X
                   33.46       +82.8%        61.18  NFS-thresh=100M/nfs-2dd-1M-32p-32768M-100M:10-X
                   21.81        +2.8%        22.42  NFS-thresh=10M/nfs-10dd-1M-32p-32768M-10M:10-X
                   28.71       -11.0%        25.55  NFS-thresh=10M/nfs-1dd-1M-32p-32768M-10M:10-X
                   28.11       -13.0%        24.46  NFS-thresh=10M/nfs-2dd-1M-32p-32768M-10M:10-X
                   54.10       +42.3%        76.97  NFS-thresh=1G/nfs-10dd-1M-32p-32768M-1024M:10-X
                   64.88       +29.1%        83.76  NFS-thresh=1G/nfs-1dd-1M-32p-32768M-1024M:10-X
                   53.64       +56.2%        83.79  NFS-thresh=1G/nfs-2dd-1M-32p-32768M-1024M:10-X
                   13.00       -61.0%         5.06  NFS-thresh=1M/nfs-10dd-1M-32p-32768M-1M:10-X
                   13.53       -33.9%         8.95  NFS-thresh=1M/nfs-1dd-1M-32p-32768M-1M:10-X
                   16.51       -72.5%         4.54  NFS-thresh=1M/nfs-2dd-1M-32p-32768M-1M:10-X
                  413.20       +23.2%       509.18  TOTAL write_bw

dirty_bytes = 100MB, background threshold = 50MB
------------------------------------------------
graphs attached for cases:
        nfs-1dd-100k-32p-32016M-1024M:10-3.1.0-vanilla+
        nfs-1dd-100k-32p-31951M-1024M:10-3.1.0-rc9-ioless-full-nfs-wq5-next-20111014+

          3.1.0-vanilla+  3.1.0-rc9-ioless-full-nfs-wq5-next-20111014+
------------------------  ------------------------
                   24.93       +71.1%        42.67  NFS-thresh=100M/nfs-10dd-100k-32p-32768M-100M:10-X
                   58.35       +23.0%        71.78  NFS-thresh=100M/nfs-1dd-100k-32p-32768M-100M:10-X
                   28.47      +123.0%        63.48  NFS-thresh=100M/nfs-2dd-100k-32p-32768M-100M:10-X
                   18.44       +22.5%        22.60  NFS-thresh=10M/nfs-10dd-100k-32p-32768M-10M:10-X
                   28.75        +0.1%        28.78  NFS-thresh=10M/nfs-1dd-100k-32p-32768M-10M:10-X
                   20.84       +12.0%        23.34  NFS-thresh=10M/nfs-2dd-100k-32p-32768M-10M:10-X
                   71.78        +6.4%        76.38  NFS-thresh=1G/nfs-10dd-100k-32p-32768M-1024M:10-X
                   91.02        -4.7%        86.72  NFS-thresh=1G/nfs-1dd-100k-32p-32768M-1024M:10-X
                   78.84        +9.9%        86.64  NFS-thresh=1G/nfs-2dd-100k-32p-32768M-1024M:10-X
                  421.42       +19.2%       502.39  TOTAL write_bw

The conclusion is,

- it helps the vanilla kernel performance to lower the NFS server's
  dirty threshold

- when NFS server's background writeback is not running, the patches
  will help establish good net/disk pipeline and improve performance;
  otherwise it maintains roughly the same performance

Thanks,
Fengguang

[-- Attachment #2: dstat-nfss-bw.png --]
[-- Type: image/png, Size: 58596 bytes --]

[-- Attachment #3: dstat-nfss-bw.png --]
[-- Type: image/png, Size: 80812 bytes --]

[-- Attachment #4: dstat-nfss-bw.png --]
[-- Type: image/png, Size: 57489 bytes --]

[-- Attachment #5: dstat-nfss-bw.png --]
[-- Type: image/png, Size: 72627 bytes --]

[-- Attachment #6: dstat-nfss-bw.png --]
[-- Type: image/png, Size: 63146 bytes --]

[-- Attachment #7: dstat-nfss-bw.png --]
[-- Type: image/png, Size: 70136 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-10-25 21:09 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-20 15:55 [PATCH 1/2] nfs: writeback pages wait queue Wu Fengguang
2011-10-20 15:55 ` Wu Fengguang
2011-10-20 15:56 ` [PATCH 2/2] nfs: scale writeback threshold proportional to dirty threshold Wu Fengguang
2011-10-20 15:56   ` Wu Fengguang
2011-10-20 16:05 ` [PATCH 1/2] nfs: writeback pages wait queue Wu Fengguang
2011-10-20 16:05   ` Wu Fengguang
2011-10-22  7:34   ` Wu Fengguang
2011-10-23 15:54   ` Wu Fengguang
2011-10-25 21:08     ` Wu Fengguang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.