All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: miklos@szeredi.hu, akpm@linux-foundation.org, neilb@suse.de,
	dgc@sgi.com, tomoki.sekiyama.qu@hitachi.com,
	a.p.zijlstra@chello.nl, nikita@clusterfs.com,
	trond.myklebust@fys.uio.no, yingchao.zhou@gmail.com,
	richard@rsk.demon.co.uk, torvalds@linux-foundation.org
Subject: [PATCH 22/23] mm: dirty balancing for tasks
Date: Tue, 11 Sep 2007 21:54:12 +0200	[thread overview]
Message-ID: <20070911200015.858159000@chello.nl> (raw)
In-Reply-To: 20070911195350.825778000@chello.nl

[-- Attachment #1: dirty_pages2.patch --]
[-- Type: text/plain, Size: 5673 bytes --]

Based on ideas of Andrew:
  http://marc.info/?l=linux-kernel&m=102912915020543&w=2

Scale the bdi dirty limit inversly with the tasks dirty rate.
This makes heavy writers have a lower dirty limit than the occasional writer. 

Andrea proposed something similar:
  http://lwn.net/Articles/152277/

The main disadvantage to his patch is that he uses an unrelated quantity to
measure time, which leaves him with a workload dependant tunable. Other than
that the two approaches appear quite similar.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---

Changes since -v8:

 - initialized init_task
 - moved the prop_local init after the task_struct copy
 - changed the per task ratio to 1/8 (from 1/2).
 - explicit usage of prop_local_single

 include/linux/init_task.h |    1 
 include/linux/sched.h     |    2 +
 kernel/fork.c             |   10 +++++++++
 mm/page-writeback.c       |   50 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 62 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -84,6 +84,7 @@ struct sched_param {
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
 #include <linux/task_io_accounting.h>
+#include <linux/proportions.h>
 
 #include <asm/processor.h>
 
@@ -1125,6 +1126,7 @@ struct task_struct {
 #ifdef CONFIG_FAULT_INJECTION
 	int make_it_fail;
 #endif
+	struct prop_local_single dirties;
 };
 
 /*
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -107,6 +107,7 @@ static struct kmem_cache *mm_cachep;
 
 void free_task(struct task_struct *tsk)
 {
+	prop_local_destroy_single(&tsk->dirties);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	free_task_struct(tsk);
@@ -163,6 +164,7 @@ static struct task_struct *dup_task_stru
 {
 	struct task_struct *tsk;
 	struct thread_info *ti;
+	int err;
 
 	prepare_to_copy(orig);
 
@@ -178,6 +180,14 @@ static struct task_struct *dup_task_stru
 
 	*tsk = *orig;
 	tsk->stack = ti;
+
+	err = prop_local_init_single(&tsk->dirties);
+	if (err) {
+		free_thread_info(ti);
+		free_task_struct(tsk);
+		return NULL;
+	}
+
 	setup_thread_stack(tsk, orig);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
Index: linux-2.6/mm/page-writeback.c
===================================================================
--- linux-2.6.orig/mm/page-writeback.c
+++ linux-2.6/mm/page-writeback.c
@@ -118,6 +118,7 @@ static void background_writeout(unsigned
  *
  */
 static struct prop_descriptor vm_completions;
+static struct prop_descriptor vm_dirties;
 
 static unsigned long determine_dirtyable_memory(void);
 
@@ -146,6 +147,7 @@ int dirty_ratio_handler(ctl_table *table
 	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
 		int shift = calc_period_shift();
 		prop_change_shift(&vm_completions, shift);
+		prop_change_shift(&vm_dirties, shift);
 	}
 	return ret;
 }
@@ -159,6 +161,11 @@ static inline void __bdi_writeout_inc(st
 	__prop_inc_percpu(&vm_completions, &bdi->completions);
 }
 
+static inline void task_dirty_inc(struct task_struct *tsk)
+{
+	prop_inc_single(&vm_dirties, &tsk->dirties);
+}
+
 /*
  * Obtain an accurate fraction of the BDI's portion.
  */
@@ -198,6 +205,37 @@ clip_bdi_dirty_limit(struct backing_dev_
 	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
 }
 
+static inline void task_dirties_fraction(struct task_struct *tsk,
+		long *numerator, long *denominator)
+{
+	prop_fraction_single(&vm_dirties, &tsk->dirties,
+				numerator, denominator);
+}
+
+/*
+ * scale the dirty limit
+ *
+ * task specific dirty limit:
+ *
+ *   dirty -= (dirty/8) * p_{t}
+ */
+void task_dirty_limit(struct task_struct *tsk, long *pdirty)
+{
+	long numerator, denominator;
+	long dirty = *pdirty;
+	long long inv = dirty >> 3;
+
+	task_dirties_fraction(tsk, &numerator, &denominator);
+	inv *= numerator;
+	do_div(inv, denominator);
+
+	dirty -= inv;
+	if (dirty < *pdirty/2)
+		dirty = *pdirty/2;
+
+	*pdirty = dirty;
+}
+
 /*
  * Work out the current dirty-memory clamping and background writeout
  * thresholds.
@@ -304,6 +342,7 @@ get_dirty_limits(long *pbackground, long
 
 		*pbdi_dirty = bdi_dirty;
 		clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
+		task_dirty_limit(current, pbdi_dirty);
 	}
 }
 
@@ -725,6 +764,7 @@ void __init page_writeback_init(void)
 
 	shift = calc_period_shift();
 	prop_descriptor_init(&vm_completions, shift);
+	prop_descriptor_init(&vm_dirties, shift);
 }
 
 /**
@@ -1003,7 +1043,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage
  * If the mapping doesn't provide a set_page_dirty a_op, then
  * just fall through and assume that it wants buffer_heads.
  */
-int fastcall set_page_dirty(struct page *page)
+static int __set_page_dirty(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
 
@@ -1021,6 +1061,14 @@ int fastcall set_page_dirty(struct page 
 	}
 	return 0;
 }
+
+int fastcall set_page_dirty(struct page *page)
+{
+	int ret = __set_page_dirty(page);
+	if (ret)
+		task_dirty_inc(current);
+	return ret;
+}
 EXPORT_SYMBOL(set_page_dirty);
 
 /*
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -169,6 +169,7 @@ extern struct group_info init_groups;
 		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),		\
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
+	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 }

--


WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: miklos@szeredi.hu, akpm@linux-foundation.org, neilb@suse.de,
	dgc@sgi.com, tomoki.sekiyama.qu@hitachi.com,
	a.p.zijlstra@chello.nl, nikita@clusterfs.com,
	trond.myklebust@fys.uio.no, yingchao.zhou@gmail.com,
	richard@rsk.demon.co.uk, torvalds@linux-foundation.org
Subject: [PATCH 22/23] mm: dirty balancing for tasks
Date: Tue, 11 Sep 2007 21:54:12 +0200	[thread overview]
Message-ID: <20070911200015.858159000@chello.nl> (raw)
In-Reply-To: 20070911195350.825778000@chello.nl

[-- Attachment #1: dirty_pages2.patch --]
[-- Type: text/plain, Size: 5898 bytes --]

Based on ideas of Andrew:
  http://marc.info/?l=linux-kernel&m=102912915020543&w=2

Scale the bdi dirty limit inversly with the tasks dirty rate.
This makes heavy writers have a lower dirty limit than the occasional writer. 

Andrea proposed something similar:
  http://lwn.net/Articles/152277/

The main disadvantage to his patch is that he uses an unrelated quantity to
measure time, which leaves him with a workload dependant tunable. Other than
that the two approaches appear quite similar.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---

Changes since -v8:

 - initialized init_task
 - moved the prop_local init after the task_struct copy
 - changed the per task ratio to 1/8 (from 1/2).
 - explicit usage of prop_local_single

 include/linux/init_task.h |    1 
 include/linux/sched.h     |    2 +
 kernel/fork.c             |   10 +++++++++
 mm/page-writeback.c       |   50 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 62 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -84,6 +84,7 @@ struct sched_param {
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
 #include <linux/task_io_accounting.h>
+#include <linux/proportions.h>
 
 #include <asm/processor.h>
 
@@ -1125,6 +1126,7 @@ struct task_struct {
 #ifdef CONFIG_FAULT_INJECTION
 	int make_it_fail;
 #endif
+	struct prop_local_single dirties;
 };
 
 /*
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -107,6 +107,7 @@ static struct kmem_cache *mm_cachep;
 
 void free_task(struct task_struct *tsk)
 {
+	prop_local_destroy_single(&tsk->dirties);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	free_task_struct(tsk);
@@ -163,6 +164,7 @@ static struct task_struct *dup_task_stru
 {
 	struct task_struct *tsk;
 	struct thread_info *ti;
+	int err;
 
 	prepare_to_copy(orig);
 
@@ -178,6 +180,14 @@ static struct task_struct *dup_task_stru
 
 	*tsk = *orig;
 	tsk->stack = ti;
+
+	err = prop_local_init_single(&tsk->dirties);
+	if (err) {
+		free_thread_info(ti);
+		free_task_struct(tsk);
+		return NULL;
+	}
+
 	setup_thread_stack(tsk, orig);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
Index: linux-2.6/mm/page-writeback.c
===================================================================
--- linux-2.6.orig/mm/page-writeback.c
+++ linux-2.6/mm/page-writeback.c
@@ -118,6 +118,7 @@ static void background_writeout(unsigned
  *
  */
 static struct prop_descriptor vm_completions;
+static struct prop_descriptor vm_dirties;
 
 static unsigned long determine_dirtyable_memory(void);
 
@@ -146,6 +147,7 @@ int dirty_ratio_handler(ctl_table *table
 	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
 		int shift = calc_period_shift();
 		prop_change_shift(&vm_completions, shift);
+		prop_change_shift(&vm_dirties, shift);
 	}
 	return ret;
 }
@@ -159,6 +161,11 @@ static inline void __bdi_writeout_inc(st
 	__prop_inc_percpu(&vm_completions, &bdi->completions);
 }
 
+static inline void task_dirty_inc(struct task_struct *tsk)
+{
+	prop_inc_single(&vm_dirties, &tsk->dirties);
+}
+
 /*
  * Obtain an accurate fraction of the BDI's portion.
  */
@@ -198,6 +205,37 @@ clip_bdi_dirty_limit(struct backing_dev_
 	*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
 }
 
+static inline void task_dirties_fraction(struct task_struct *tsk,
+		long *numerator, long *denominator)
+{
+	prop_fraction_single(&vm_dirties, &tsk->dirties,
+				numerator, denominator);
+}
+
+/*
+ * scale the dirty limit
+ *
+ * task specific dirty limit:
+ *
+ *   dirty -= (dirty/8) * p_{t}
+ */
+void task_dirty_limit(struct task_struct *tsk, long *pdirty)
+{
+	long numerator, denominator;
+	long dirty = *pdirty;
+	long long inv = dirty >> 3;
+
+	task_dirties_fraction(tsk, &numerator, &denominator);
+	inv *= numerator;
+	do_div(inv, denominator);
+
+	dirty -= inv;
+	if (dirty < *pdirty/2)
+		dirty = *pdirty/2;
+
+	*pdirty = dirty;
+}
+
 /*
  * Work out the current dirty-memory clamping and background writeout
  * thresholds.
@@ -304,6 +342,7 @@ get_dirty_limits(long *pbackground, long
 
 		*pbdi_dirty = bdi_dirty;
 		clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
+		task_dirty_limit(current, pbdi_dirty);
 	}
 }
 
@@ -725,6 +764,7 @@ void __init page_writeback_init(void)
 
 	shift = calc_period_shift();
 	prop_descriptor_init(&vm_completions, shift);
+	prop_descriptor_init(&vm_dirties, shift);
 }
 
 /**
@@ -1003,7 +1043,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage
  * If the mapping doesn't provide a set_page_dirty a_op, then
  * just fall through and assume that it wants buffer_heads.
  */
-int fastcall set_page_dirty(struct page *page)
+static int __set_page_dirty(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
 
@@ -1021,6 +1061,14 @@ int fastcall set_page_dirty(struct page 
 	}
 	return 0;
 }
+
+int fastcall set_page_dirty(struct page *page)
+{
+	int ret = __set_page_dirty(page);
+	if (ret)
+		task_dirty_inc(current);
+	return ret;
+}
 EXPORT_SYMBOL(set_page_dirty);
 
 /*
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -169,6 +169,7 @@ extern struct group_info init_groups;
 		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),		\
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
+	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 }

--

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2007-09-11 20:13 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-11 19:53 [PATCH 00/23] per device dirty throttling -v10 Peter Zijlstra
2007-09-11 19:53 ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 01/23] nfs: remove congestion_end() Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 02/23] lib: percpu_counter_add Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 03/23] lib: percpu_counter_sub Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 04/23] lib: percpu_counter variable batch Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 05/23] lib: make percpu_counter_add take s64 Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 06/23] lib: percpu_counter_set Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 07/23] lib: percpu_counter_sum_positive Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 08/23] lib: percpu_count_sum() Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:53 ` [PATCH 09/23] lib: percpu_counter_init error handling Peter Zijlstra
2007-09-11 19:53   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 10/23] lib: percpu_counter_init_irq Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 11/23] mm: bdi init hooks Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 12/23] containers: " Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 13/23] mtd: " Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 14/23] mtd: clean up the backing_dev_info usage Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 15/23] mtd: give mtdconcat devices their own backing_dev_info Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 16/23] mm: scalable bdi statistics counters Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 17/23] mm: count reclaimable pages per BDI Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 18/23] mm: count writeback " Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 19/23] mm: expose BDI statistics in sysfs Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 20/23] lib: floating proportions Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-11 19:54 ` [PATCH 21/23] mm: per device dirty threshold Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-12  2:36   ` John Stoffel
2007-09-12  2:36     ` John Stoffel
2007-09-12  8:45     ` Peter Zijlstra
2007-09-11 19:54 ` Peter Zijlstra [this message]
2007-09-11 19:54   ` [PATCH 22/23] mm: dirty balancing for tasks Peter Zijlstra
2007-09-11 19:54 ` [PATCH 23/23] debug: sysfs files for the current ratio/size/total Peter Zijlstra
2007-09-11 19:54   ` Peter Zijlstra
2007-09-12  2:31 ` [PATCH 00/23] per device dirty throttling -v10 John Stoffel
2007-09-12  2:31   ` John Stoffel
2007-09-12  9:00   ` Peter Zijlstra
  -- strict thread matches above, loose matches on Subject: below --
2007-08-16  7:45 [PATCH 00/23] per device dirty throttling -v9 Peter Zijlstra
2007-08-16  7:45 ` [PATCH 22/23] mm: dirty balancing for tasks Peter Zijlstra
2007-08-16  7:45   ` Peter Zijlstra
2007-08-03 12:37 [PATCH 00/23] per device dirty throttling -v8 Peter Zijlstra
2007-08-03 12:37 ` [PATCH 22/23] mm: dirty balancing for tasks Peter Zijlstra
2007-08-03 12:37   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070911200015.858159000@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=dgc@sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=miklos@szeredi.hu \
    --cc=neilb@suse.de \
    --cc=nikita@clusterfs.com \
    --cc=richard@rsk.demon.co.uk \
    --cc=tomoki.sekiyama.qu@hitachi.com \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@fys.uio.no \
    --cc=yingchao.zhou@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.