From: Dave Hansen <haveblue@us.ibm.com> To: Andrew Morton <akpm@zip.com.au> Cc: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>, William Lee Irwin III <wli@holomorphy.com>, linux-kernel@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH] per-zone kswapd process Date: Thu, 12 Sep 2002 20:33:32 -0700 [thread overview] Message-ID: <3D815C8C.4050000@us.ibm.com> (raw) [-- Attachment #1: Type: text/plain, Size: 1119 bytes --] This patch implements a kswapd process for each memory zone. The original code came from Bill Irwin, but the current VM is quite a bit different from the one that he wrote it for, so not much remains. The current kswapd interface is much more simple than before because there is a single waitqueue and there is a single place where it is emptied. kswapd_can_sleep() and kswapd_balance() are simpler now that the extra pgdat level of indirection is gone. Tested on 8-way PIII with highmem off and then 4GB support. With 4GB support, I did 20 parallel greps through a 10GB fileset while some other processes allocated and freed 1-2GB chunks of memory. That gave kswapd a good workout, and I observed it running the zone Highmem and zone Normal kswapd threads. So, it survives my torture test. It also removes more code than it adds. include/linux/mmzone.h | 2 + include/linux/swap.h | 1 mm/page_alloc.c | 11 +++++- mm/vmscan.c | 88 +++++++++++++++++-------------------------------- 4 files changed, 42 insertions(+), 60 deletions(-) -- Dave Hansen haveblue@us.ibm.com [-- Attachment #2: per-zone-kswapd-2.5.34-mm2-3.patch --] [-- Type: text/plain, Size: 6092 bytes --] # This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.625 -> 1.628 # include/linux/mmzone.h 1.19 -> 1.20 # include/linux/swap.h 1.57 -> 1.58 # mm/page_alloc.c 1.98 -> 1.101 # mm/vmscan.c 1.102 -> 1.105 # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 02/09/12 haveblue@elm3b96.(none) 1.626 # add per-zone kswapd # -------------------------------------------- # 02/09/12 haveblue@elm3b96.(none) 1.627 # fix some wli-indicated formatting bits # -------------------------------------------- # 02/09/12 haveblue@elm3b96.(none) 1.628 # move waitqueue init to a more appropriate place # -------------------------------------------- # diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h --- a/include/linux/mmzone.h Thu Sep 12 20:24:39 2002 +++ b/include/linux/mmzone.h Thu Sep 12 20:24:39 2002 @@ -108,6 +108,8 @@ unsigned long wait_table_size; unsigned long wait_table_bits; + wait_queue_head_t kswapd_wait; + /* * Discontig memory support fields. */ diff -Nru a/include/linux/swap.h b/include/linux/swap.h --- a/include/linux/swap.h Thu Sep 12 20:24:39 2002 +++ b/include/linux/swap.h Thu Sep 12 20:24:39 2002 @@ -162,7 +162,6 @@ extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern wait_queue_head_t kswapd_wait; extern int try_to_free_pages(struct zone *, unsigned int, unsigned int); /* linux/mm/page_io.c */ diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c --- a/mm/page_alloc.c Thu Sep 12 20:24:39 2002 +++ b/mm/page_alloc.c Thu Sep 12 20:24:39 2002 @@ -345,8 +345,15 @@ classzone->need_balance = 1; mb(); /* we're somewhat low on memory, failed to find what we needed */ - if (waitqueue_active(&kswapd_wait)) - wake_up_interruptible(&kswapd_wait); + for (i = 0; zones[i] != NULL; i++) { + struct zone *z = zones[i]; + + /* We don't want to go swapping on zones that aren't actually + * low. This accounts for "incremental min" from last loop */ + if (z->free_pages <= z->pages_low && + waitqueue_active(&z->kswapd_wait)) + wake_up_interruptible(&z->kswapd_wait); + } /* Go through the zonelist again, taking __GFP_HIGH into account */ min = 1UL << order; @@ -874,6 +881,8 @@ for(i = 0; i < zone->wait_table_size; ++i) init_waitqueue_head(zone->wait_table + i); + init_waitqueue_head(&zone->kswapd_wait); + pgdat->nr_zones = j+1; mask = (realsize / zone_balance_ratio[j]); diff -Nru a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c Thu Sep 12 20:24:39 2002 +++ b/mm/vmscan.c Thu Sep 12 20:24:39 2002 @@ -713,8 +713,6 @@ return 0; } -DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); - static int check_classzone_need_balance(struct zone *classzone) { struct zone *first_classzone; @@ -728,71 +726,33 @@ return 1; } -static int kswapd_balance_pgdat(pg_data_t * pgdat) +static int kswapd_balance_zone(struct zone *zone) { - int need_more_balance = 0, i; - struct zone *zone; - - for (i = pgdat->nr_zones-1; i >= 0; i--) { - zone = pgdat->node_zones + i; + int need_more_balance = 0; + + do { cond_resched(); if (!zone->need_balance) - continue; + break; if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) { zone->need_balance = 0; __set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ); - continue; + break; } if (check_classzone_need_balance(zone)) need_more_balance = 1; else zone->need_balance = 0; - } - - return need_more_balance; -} - -static void kswapd_balance(void) -{ - int need_more_balance; - pg_data_t * pgdat; - - do { - need_more_balance = 0; - pgdat = pgdat_list; - do - need_more_balance |= kswapd_balance_pgdat(pgdat); - while ((pgdat = pgdat->pgdat_next)); } while (need_more_balance); -} -static int kswapd_can_sleep_pgdat(pg_data_t * pgdat) -{ - struct zone *zone; - int i; - - for (i = pgdat->nr_zones-1; i >= 0; i--) { - zone = pgdat->node_zones + i; - if (!zone->need_balance) - continue; - return 0; - } - - return 1; + return 0; } -static int kswapd_can_sleep(void) +static int kswapd_can_sleep_zone(struct zone *zone) { - pg_data_t * pgdat; - - pgdat = pgdat_list; - do { - if (kswapd_can_sleep_pgdat(pgdat)) - continue; - return 0; - } while ((pgdat = pgdat->pgdat_next)); - + if (zone->need_balance) + return 0; return 1; } @@ -809,13 +769,18 @@ * If there are applications that are active memory-allocators * (most normal use), this basically shouldn't matter. */ -int kswapd(void *unused) +int kswapd_zone(void *p) { + struct zone *zone = (struct zone *)p; struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); + + printk( "kswapd%d starting for %s\n", + zone - zone->zone_pgdat->node_zones, + zone->name); daemonize(); - strcpy(tsk->comm, "kswapd"); + sprintf(tsk->comm, "kswapd%d", zone - zone->zone_pgdat->node_zones); sigfillset(&tsk->blocked); /* @@ -839,30 +804,37 @@ if (current->flags & PF_FREEZE) refrigerator(PF_IOTHREAD); __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kswapd_wait, &wait); + add_wait_queue(&zone->kswapd_wait, &wait); mb(); - if (kswapd_can_sleep()) + if (kswapd_can_sleep_zone(zone)) schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&kswapd_wait, &wait); + remove_wait_queue(&zone->kswapd_wait, &wait); /* * If we actually get into a low-memory situation, * the processes needing more memory will wake us * up on a more timely basis. */ - kswapd_balance(); + kswapd_balance_zone(zone); blk_run_queues(); } } static int __init kswapd_init(void) { + struct zone* zone; + printk("Starting kswapd\n"); swap_setup(); - kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL); + for_each_zone(zone) + if (zone->size) + kernel_thread(kswapd_zone, + zone, + CLONE_FS | CLONE_FILES | CLONE_SIGNAL); + return 0; }
next reply other threads:[~2002-09-13 3:30 UTC|newest] Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top 2002-09-13 3:33 Dave Hansen [this message] 2002-09-13 4:06 ` Andrew Morton 2002-09-13 4:59 ` William Lee Irwin III 2002-09-13 5:10 ` Martin J. Bligh [not found] ` <3D8232DE.9090000@us.ibm.com> [not found] ` <3D823702.8E29AB4F@digeo.com> [not found] ` <3D8251D6.3060704@us.ibm.com> [not found] ` <3D82566B.EB2939D5@digeo.com> 2002-09-13 22:52 ` [PATCH] per-zone^Wnode " Dave Hansen 2002-09-13 23:24 ` Matthew Dobson 2002-09-13 23:29 ` Matthew Dobson 2002-09-13 23:46 ` William Lee Irwin III 2002-09-14 0:02 ` Andrew Morton 2002-09-14 0:12 ` William Lee Irwin III 2002-09-14 1:19 ` Andrew Morton 2002-09-13 5:46 ` [PATCH] per-zone " Andrew Morton 2002-09-13 5:38 ` Martin J. Bligh 2002-09-13 6:03 ` Andrew Morton 2002-09-13 13:05 ` Alan Cox 2002-09-13 21:30 ` William Lee Irwin III 2002-09-18 16:07 ` [PATCH] recognize MAP_LOCKED in mmap() call Hubertus Franke 2002-09-18 16:29 ` Andrew Morton 2002-09-16 5:44 ` [PATCH] per-zone kswapd process Daniel Phillips 2002-09-16 7:46 ` William Lee Irwin III 2002-09-16 15:12 ` Rik van Riel
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=3D815C8C.4050000@us.ibm.com \ --to=haveblue@us.ibm.com \ --cc=Martin.Bligh@us.ibm.com \ --cc=akpm@zip.com.au \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=wli@holomorphy.com \ --subject='Re: [PATCH] per-zone kswapd process' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).