From: Peter Zijlstra <a.p.zijlstra@chello.nl> To: linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: miklos@szeredi.hu, akpm@linux-foundation.org, neilb@suse.de, dgc@sgi.com, tomoki.sekiyama.qu@hitachi.com, a.p.zijlstra@chello.nl, nikita@clusterfs.com, trond.myklebust@fys.uio.no, yingchao.zhou@gmail.com, richard@rsk.demon.co.uk, torvalds@linux-foundation.org Subject: [PATCH 22/23] mm: dirty balancing for tasks Date: Tue, 11 Sep 2007 21:54:12 +0200 [thread overview] Message-ID: <20070911200015.858159000@chello.nl> (raw) In-Reply-To: 20070911195350.825778000@chello.nl [-- Attachment #1: dirty_pages2.patch --] [-- Type: text/plain, Size: 5673 bytes --] Based on ideas of Andrew: http://marc.info/?l=linux-kernel&m=102912915020543&w=2 Scale the bdi dirty limit inversly with the tasks dirty rate. This makes heavy writers have a lower dirty limit than the occasional writer. Andrea proposed something similar: http://lwn.net/Articles/152277/ The main disadvantage to his patch is that he uses an unrelated quantity to measure time, which leaves him with a workload dependant tunable. Other than that the two approaches appear quite similar. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> --- Changes since -v8: - initialized init_task - moved the prop_local init after the task_struct copy - changed the per task ratio to 1/8 (from 1/2). - explicit usage of prop_local_single include/linux/init_task.h | 1 include/linux/sched.h | 2 + kernel/fork.c | 10 +++++++++ mm/page-writeback.c | 50 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 62 insertions(+), 1 deletion(-) Index: linux-2.6/include/linux/sched.h =================================================================== --- linux-2.6.orig/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -84,6 +84,7 @@ struct sched_param { #include <linux/timer.h> #include <linux/hrtimer.h> #include <linux/task_io_accounting.h> +#include <linux/proportions.h> #include <asm/processor.h> @@ -1125,6 +1126,7 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif + struct prop_local_single dirties; }; /* Index: linux-2.6/kernel/fork.c =================================================================== --- linux-2.6.orig/kernel/fork.c +++ linux-2.6/kernel/fork.c @@ -107,6 +107,7 @@ static struct kmem_cache *mm_cachep; void free_task(struct task_struct *tsk) { + prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); free_task_struct(tsk); @@ -163,6 +164,7 @@ static struct task_struct *dup_task_stru { struct task_struct *tsk; struct thread_info *ti; + int err; prepare_to_copy(orig); @@ -178,6 +180,14 @@ static struct task_struct *dup_task_stru *tsk = *orig; tsk->stack = ti; + + err = prop_local_init_single(&tsk->dirties); + if (err) { + free_thread_info(ti); + free_task_struct(tsk); + return NULL; + } + setup_thread_stack(tsk, orig); #ifdef CONFIG_CC_STACKPROTECTOR Index: linux-2.6/mm/page-writeback.c =================================================================== --- linux-2.6.orig/mm/page-writeback.c +++ linux-2.6/mm/page-writeback.c @@ -118,6 +118,7 @@ static void background_writeout(unsigned * */ static struct prop_descriptor vm_completions; +static struct prop_descriptor vm_dirties; static unsigned long determine_dirtyable_memory(void); @@ -146,6 +147,7 @@ int dirty_ratio_handler(ctl_table *table if (ret == 0 && write && vm_dirty_ratio != old_ratio) { int shift = calc_period_shift(); prop_change_shift(&vm_completions, shift); + prop_change_shift(&vm_dirties, shift); } return ret; } @@ -159,6 +161,11 @@ static inline void __bdi_writeout_inc(st __prop_inc_percpu(&vm_completions, &bdi->completions); } +static inline void task_dirty_inc(struct task_struct *tsk) +{ + prop_inc_single(&vm_dirties, &tsk->dirties); +} + /* * Obtain an accurate fraction of the BDI's portion. */ @@ -198,6 +205,37 @@ clip_bdi_dirty_limit(struct backing_dev_ *pbdi_dirty = min(*pbdi_dirty, avail_dirty); } +static inline void task_dirties_fraction(struct task_struct *tsk, + long *numerator, long *denominator) +{ + prop_fraction_single(&vm_dirties, &tsk->dirties, + numerator, denominator); +} + +/* + * scale the dirty limit + * + * task specific dirty limit: + * + * dirty -= (dirty/8) * p_{t} + */ +void task_dirty_limit(struct task_struct *tsk, long *pdirty) +{ + long numerator, denominator; + long dirty = *pdirty; + long long inv = dirty >> 3; + + task_dirties_fraction(tsk, &numerator, &denominator); + inv *= numerator; + do_div(inv, denominator); + + dirty -= inv; + if (dirty < *pdirty/2) + dirty = *pdirty/2; + + *pdirty = dirty; +} + /* * Work out the current dirty-memory clamping and background writeout * thresholds. @@ -304,6 +342,7 @@ get_dirty_limits(long *pbackground, long *pbdi_dirty = bdi_dirty; clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); + task_dirty_limit(current, pbdi_dirty); } } @@ -725,6 +764,7 @@ void __init page_writeback_init(void) shift = calc_period_shift(); prop_descriptor_init(&vm_completions, shift); + prop_descriptor_init(&vm_dirties, shift); } /** @@ -1003,7 +1043,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ -int fastcall set_page_dirty(struct page *page) +static int __set_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -1021,6 +1061,14 @@ int fastcall set_page_dirty(struct page } return 0; } + +int fastcall set_page_dirty(struct page *page) +{ + int ret = __set_page_dirty(page); + if (ret) + task_dirty_inc(current); + return ret; +} EXPORT_SYMBOL(set_page_dirty); /* Index: linux-2.6/include/linux/init_task.h =================================================================== --- linux-2.6.orig/include/linux/init_task.h +++ linux-2.6/include/linux/init_task.h @@ -169,6 +169,7 @@ extern struct group_info init_groups; [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ + .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ } --
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl> To: linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: miklos@szeredi.hu, akpm@linux-foundation.org, neilb@suse.de, dgc@sgi.com, tomoki.sekiyama.qu@hitachi.com, a.p.zijlstra@chello.nl, nikita@clusterfs.com, trond.myklebust@fys.uio.no, yingchao.zhou@gmail.com, richard@rsk.demon.co.uk, torvalds@linux-foundation.org Subject: [PATCH 22/23] mm: dirty balancing for tasks Date: Tue, 11 Sep 2007 21:54:12 +0200 [thread overview] Message-ID: <20070911200015.858159000@chello.nl> (raw) In-Reply-To: 20070911195350.825778000@chello.nl [-- Attachment #1: dirty_pages2.patch --] [-- Type: text/plain, Size: 5898 bytes --] Based on ideas of Andrew: http://marc.info/?l=linux-kernel&m=102912915020543&w=2 Scale the bdi dirty limit inversly with the tasks dirty rate. This makes heavy writers have a lower dirty limit than the occasional writer. Andrea proposed something similar: http://lwn.net/Articles/152277/ The main disadvantage to his patch is that he uses an unrelated quantity to measure time, which leaves him with a workload dependant tunable. Other than that the two approaches appear quite similar. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> --- Changes since -v8: - initialized init_task - moved the prop_local init after the task_struct copy - changed the per task ratio to 1/8 (from 1/2). - explicit usage of prop_local_single include/linux/init_task.h | 1 include/linux/sched.h | 2 + kernel/fork.c | 10 +++++++++ mm/page-writeback.c | 50 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 62 insertions(+), 1 deletion(-) Index: linux-2.6/include/linux/sched.h =================================================================== --- linux-2.6.orig/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -84,6 +84,7 @@ struct sched_param { #include <linux/timer.h> #include <linux/hrtimer.h> #include <linux/task_io_accounting.h> +#include <linux/proportions.h> #include <asm/processor.h> @@ -1125,6 +1126,7 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif + struct prop_local_single dirties; }; /* Index: linux-2.6/kernel/fork.c =================================================================== --- linux-2.6.orig/kernel/fork.c +++ linux-2.6/kernel/fork.c @@ -107,6 +107,7 @@ static struct kmem_cache *mm_cachep; void free_task(struct task_struct *tsk) { + prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); free_task_struct(tsk); @@ -163,6 +164,7 @@ static struct task_struct *dup_task_stru { struct task_struct *tsk; struct thread_info *ti; + int err; prepare_to_copy(orig); @@ -178,6 +180,14 @@ static struct task_struct *dup_task_stru *tsk = *orig; tsk->stack = ti; + + err = prop_local_init_single(&tsk->dirties); + if (err) { + free_thread_info(ti); + free_task_struct(tsk); + return NULL; + } + setup_thread_stack(tsk, orig); #ifdef CONFIG_CC_STACKPROTECTOR Index: linux-2.6/mm/page-writeback.c =================================================================== --- linux-2.6.orig/mm/page-writeback.c +++ linux-2.6/mm/page-writeback.c @@ -118,6 +118,7 @@ static void background_writeout(unsigned * */ static struct prop_descriptor vm_completions; +static struct prop_descriptor vm_dirties; static unsigned long determine_dirtyable_memory(void); @@ -146,6 +147,7 @@ int dirty_ratio_handler(ctl_table *table if (ret == 0 && write && vm_dirty_ratio != old_ratio) { int shift = calc_period_shift(); prop_change_shift(&vm_completions, shift); + prop_change_shift(&vm_dirties, shift); } return ret; } @@ -159,6 +161,11 @@ static inline void __bdi_writeout_inc(st __prop_inc_percpu(&vm_completions, &bdi->completions); } +static inline void task_dirty_inc(struct task_struct *tsk) +{ + prop_inc_single(&vm_dirties, &tsk->dirties); +} + /* * Obtain an accurate fraction of the BDI's portion. */ @@ -198,6 +205,37 @@ clip_bdi_dirty_limit(struct backing_dev_ *pbdi_dirty = min(*pbdi_dirty, avail_dirty); } +static inline void task_dirties_fraction(struct task_struct *tsk, + long *numerator, long *denominator) +{ + prop_fraction_single(&vm_dirties, &tsk->dirties, + numerator, denominator); +} + +/* + * scale the dirty limit + * + * task specific dirty limit: + * + * dirty -= (dirty/8) * p_{t} + */ +void task_dirty_limit(struct task_struct *tsk, long *pdirty) +{ + long numerator, denominator; + long dirty = *pdirty; + long long inv = dirty >> 3; + + task_dirties_fraction(tsk, &numerator, &denominator); + inv *= numerator; + do_div(inv, denominator); + + dirty -= inv; + if (dirty < *pdirty/2) + dirty = *pdirty/2; + + *pdirty = dirty; +} + /* * Work out the current dirty-memory clamping and background writeout * thresholds. @@ -304,6 +342,7 @@ get_dirty_limits(long *pbackground, long *pbdi_dirty = bdi_dirty; clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); + task_dirty_limit(current, pbdi_dirty); } } @@ -725,6 +764,7 @@ void __init page_writeback_init(void) shift = calc_period_shift(); prop_descriptor_init(&vm_completions, shift); + prop_descriptor_init(&vm_dirties, shift); } /** @@ -1003,7 +1043,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ -int fastcall set_page_dirty(struct page *page) +static int __set_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -1021,6 +1061,14 @@ int fastcall set_page_dirty(struct page } return 0; } + +int fastcall set_page_dirty(struct page *page) +{ + int ret = __set_page_dirty(page); + if (ret) + task_dirty_inc(current); + return ret; +} EXPORT_SYMBOL(set_page_dirty); /* Index: linux-2.6/include/linux/init_task.h =================================================================== --- linux-2.6.orig/include/linux/init_task.h +++ linux-2.6/include/linux/init_task.h @@ -169,6 +169,7 @@ extern struct group_info init_groups; [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ + .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ } -- -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2007-09-11 20:13 UTC|newest] Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top 2007-09-11 19:53 [PATCH 00/23] per device dirty throttling -v10 Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 01/23] nfs: remove congestion_end() Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 02/23] lib: percpu_counter_add Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 03/23] lib: percpu_counter_sub Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 04/23] lib: percpu_counter variable batch Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 05/23] lib: make percpu_counter_add take s64 Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 06/23] lib: percpu_counter_set Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 07/23] lib: percpu_counter_sum_positive Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 08/23] lib: percpu_count_sum() Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:53 ` [PATCH 09/23] lib: percpu_counter_init error handling Peter Zijlstra 2007-09-11 19:53 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 10/23] lib: percpu_counter_init_irq Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 11/23] mm: bdi init hooks Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 12/23] containers: " Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 13/23] mtd: " Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 14/23] mtd: clean up the backing_dev_info usage Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 15/23] mtd: give mtdconcat devices their own backing_dev_info Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 16/23] mm: scalable bdi statistics counters Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 17/23] mm: count reclaimable pages per BDI Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 18/23] mm: count writeback " Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 19/23] mm: expose BDI statistics in sysfs Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 20/23] lib: floating proportions Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-11 19:54 ` [PATCH 21/23] mm: per device dirty threshold Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-12 2:36 ` John Stoffel 2007-09-12 2:36 ` John Stoffel 2007-09-12 8:45 ` Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra [this message] 2007-09-11 19:54 ` [PATCH 22/23] mm: dirty balancing for tasks Peter Zijlstra 2007-09-11 19:54 ` [PATCH 23/23] debug: sysfs files for the current ratio/size/total Peter Zijlstra 2007-09-11 19:54 ` Peter Zijlstra 2007-09-12 2:31 ` [PATCH 00/23] per device dirty throttling -v10 John Stoffel 2007-09-12 2:31 ` John Stoffel 2007-09-12 9:00 ` Peter Zijlstra -- strict thread matches above, loose matches on Subject: below -- 2007-08-16 7:45 [PATCH 00/23] per device dirty throttling -v9 Peter Zijlstra 2007-08-16 7:45 ` [PATCH 22/23] mm: dirty balancing for tasks Peter Zijlstra 2007-08-16 7:45 ` Peter Zijlstra 2007-08-03 12:37 [PATCH 00/23] per device dirty throttling -v8 Peter Zijlstra 2007-08-03 12:37 ` [PATCH 22/23] mm: dirty balancing for tasks Peter Zijlstra 2007-08-03 12:37 ` Peter Zijlstra
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20070911200015.858159000@chello.nl \ --to=a.p.zijlstra@chello.nl \ --cc=akpm@linux-foundation.org \ --cc=dgc@sgi.com \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=miklos@szeredi.hu \ --cc=neilb@suse.de \ --cc=nikita@clusterfs.com \ --cc=richard@rsk.demon.co.uk \ --cc=tomoki.sekiyama.qu@hitachi.com \ --cc=torvalds@linux-foundation.org \ --cc=trond.myklebust@fys.uio.no \ --cc=yingchao.zhou@gmail.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.