From: Dave Hansen <haveblue@us.ibm.com>
To: Andrew Morton <akpm@zip.com.au>
Cc: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>,
William Lee Irwin III <wli@holomorphy.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH] per-zone kswapd process
Date: Thu, 12 Sep 2002 20:33:32 -0700 [thread overview]
Message-ID: <3D815C8C.4050000@us.ibm.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1119 bytes --]
This patch implements a kswapd process for each memory zone. The original code
came from Bill Irwin, but the current VM is quite a bit different from the one
that he wrote it for, so not much remains. The current kswapd interface is much
more simple than before because there is a single waitqueue and there is a
single place where it is emptied.
kswapd_can_sleep() and kswapd_balance() are simpler now that the extra pgdat
level of indirection is gone.
Tested on 8-way PIII with highmem off and then 4GB support. With 4GB support, I
did 20 parallel greps through a 10GB fileset while some other processes
allocated and freed 1-2GB chunks of memory. That gave kswapd a good workout,
and I observed it running the zone Highmem and zone Normal kswapd threads. So,
it survives my torture test. It also removes more code than it adds.
include/linux/mmzone.h | 2 +
include/linux/swap.h | 1
mm/page_alloc.c | 11 +++++-
mm/vmscan.c | 88 +++++++++++++++++--------------------------------
4 files changed, 42 insertions(+), 60 deletions(-)
--
Dave Hansen
haveblue@us.ibm.com
[-- Attachment #2: per-zone-kswapd-2.5.34-mm2-3.patch --]
[-- Type: text/plain, Size: 6092 bytes --]
# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.625 -> 1.628
# include/linux/mmzone.h 1.19 -> 1.20
# include/linux/swap.h 1.57 -> 1.58
# mm/page_alloc.c 1.98 -> 1.101
# mm/vmscan.c 1.102 -> 1.105
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/09/12 haveblue@elm3b96.(none) 1.626
# add per-zone kswapd
# --------------------------------------------
# 02/09/12 haveblue@elm3b96.(none) 1.627
# fix some wli-indicated formatting bits
# --------------------------------------------
# 02/09/12 haveblue@elm3b96.(none) 1.628
# move waitqueue init to a more appropriate place
# --------------------------------------------
#
diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h
--- a/include/linux/mmzone.h Thu Sep 12 20:24:39 2002
+++ b/include/linux/mmzone.h Thu Sep 12 20:24:39 2002
@@ -108,6 +108,8 @@
unsigned long wait_table_size;
unsigned long wait_table_bits;
+ wait_queue_head_t kswapd_wait;
+
/*
* Discontig memory support fields.
*/
diff -Nru a/include/linux/swap.h b/include/linux/swap.h
--- a/include/linux/swap.h Thu Sep 12 20:24:39 2002
+++ b/include/linux/swap.h Thu Sep 12 20:24:39 2002
@@ -162,7 +162,6 @@
extern void swap_setup(void);
/* linux/mm/vmscan.c */
-extern wait_queue_head_t kswapd_wait;
extern int try_to_free_pages(struct zone *, unsigned int, unsigned int);
/* linux/mm/page_io.c */
diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c Thu Sep 12 20:24:39 2002
+++ b/mm/page_alloc.c Thu Sep 12 20:24:39 2002
@@ -345,8 +345,15 @@
classzone->need_balance = 1;
mb();
/* we're somewhat low on memory, failed to find what we needed */
- if (waitqueue_active(&kswapd_wait))
- wake_up_interruptible(&kswapd_wait);
+ for (i = 0; zones[i] != NULL; i++) {
+ struct zone *z = zones[i];
+
+ /* We don't want to go swapping on zones that aren't actually
+ * low. This accounts for "incremental min" from last loop */
+ if (z->free_pages <= z->pages_low &&
+ waitqueue_active(&z->kswapd_wait))
+ wake_up_interruptible(&z->kswapd_wait);
+ }
/* Go through the zonelist again, taking __GFP_HIGH into account */
min = 1UL << order;
@@ -874,6 +881,8 @@
for(i = 0; i < zone->wait_table_size; ++i)
init_waitqueue_head(zone->wait_table + i);
+ init_waitqueue_head(&zone->kswapd_wait);
+
pgdat->nr_zones = j+1;
mask = (realsize / zone_balance_ratio[j]);
diff -Nru a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c Thu Sep 12 20:24:39 2002
+++ b/mm/vmscan.c Thu Sep 12 20:24:39 2002
@@ -713,8 +713,6 @@
return 0;
}
-DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
-
static int check_classzone_need_balance(struct zone *classzone)
{
struct zone *first_classzone;
@@ -728,71 +726,33 @@
return 1;
}
-static int kswapd_balance_pgdat(pg_data_t * pgdat)
+static int kswapd_balance_zone(struct zone *zone)
{
- int need_more_balance = 0, i;
- struct zone *zone;
-
- for (i = pgdat->nr_zones-1; i >= 0; i--) {
- zone = pgdat->node_zones + i;
+ int need_more_balance = 0;
+
+ do {
cond_resched();
if (!zone->need_balance)
- continue;
+ break;
if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
zone->need_balance = 0;
__set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
- continue;
+ break;
}
if (check_classzone_need_balance(zone))
need_more_balance = 1;
else
zone->need_balance = 0;
- }
-
- return need_more_balance;
-}
-
-static void kswapd_balance(void)
-{
- int need_more_balance;
- pg_data_t * pgdat;
-
- do {
- need_more_balance = 0;
- pgdat = pgdat_list;
- do
- need_more_balance |= kswapd_balance_pgdat(pgdat);
- while ((pgdat = pgdat->pgdat_next));
} while (need_more_balance);
-}
-static int kswapd_can_sleep_pgdat(pg_data_t * pgdat)
-{
- struct zone *zone;
- int i;
-
- for (i = pgdat->nr_zones-1; i >= 0; i--) {
- zone = pgdat->node_zones + i;
- if (!zone->need_balance)
- continue;
- return 0;
- }
-
- return 1;
+ return 0;
}
-static int kswapd_can_sleep(void)
+static int kswapd_can_sleep_zone(struct zone *zone)
{
- pg_data_t * pgdat;
-
- pgdat = pgdat_list;
- do {
- if (kswapd_can_sleep_pgdat(pgdat))
- continue;
- return 0;
- } while ((pgdat = pgdat->pgdat_next));
-
+ if (zone->need_balance)
+ return 0;
return 1;
}
@@ -809,13 +769,18 @@
* If there are applications that are active memory-allocators
* (most normal use), this basically shouldn't matter.
*/
-int kswapd(void *unused)
+int kswapd_zone(void *p)
{
+ struct zone *zone = (struct zone *)p;
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
+
+ printk( "kswapd%d starting for %s\n",
+ zone - zone->zone_pgdat->node_zones,
+ zone->name);
daemonize();
- strcpy(tsk->comm, "kswapd");
+ sprintf(tsk->comm, "kswapd%d", zone - zone->zone_pgdat->node_zones);
sigfillset(&tsk->blocked);
/*
@@ -839,30 +804,37 @@
if (current->flags & PF_FREEZE)
refrigerator(PF_IOTHREAD);
__set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kswapd_wait, &wait);
+ add_wait_queue(&zone->kswapd_wait, &wait);
mb();
- if (kswapd_can_sleep())
+ if (kswapd_can_sleep_zone(zone))
schedule();
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&kswapd_wait, &wait);
+ remove_wait_queue(&zone->kswapd_wait, &wait);
/*
* If we actually get into a low-memory situation,
* the processes needing more memory will wake us
* up on a more timely basis.
*/
- kswapd_balance();
+ kswapd_balance_zone(zone);
blk_run_queues();
}
}
static int __init kswapd_init(void)
{
+ struct zone* zone;
+
printk("Starting kswapd\n");
swap_setup();
- kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
+ for_each_zone(zone)
+ if (zone->size)
+ kernel_thread(kswapd_zone,
+ zone,
+ CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
+
return 0;
}
next reply other threads:[~2002-09-13 3:30 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2002-09-13 3:33 Dave Hansen [this message]
2002-09-13 4:06 ` [PATCH] per-zone kswapd process Andrew Morton
2002-09-13 4:59 ` William Lee Irwin III
2002-09-13 5:10 ` Martin J. Bligh
[not found] ` <3D8232DE.9090000@us.ibm.com>
[not found] ` <3D823702.8E29AB4F@digeo.com>
[not found] ` <3D8251D6.3060704@us.ibm.com>
[not found] ` <3D82566B.EB2939D5@digeo.com>
2002-09-13 22:52 ` [PATCH] per-zone^Wnode " Dave Hansen
2002-09-13 23:24 ` Matthew Dobson
2002-09-13 23:29 ` Matthew Dobson
2002-09-13 23:46 ` William Lee Irwin III
2002-09-14 0:02 ` Andrew Morton
2002-09-14 0:12 ` William Lee Irwin III
2002-09-14 1:19 ` Andrew Morton
2002-09-13 5:46 ` [PATCH] per-zone " Andrew Morton
2002-09-13 5:38 ` Martin J. Bligh
2002-09-13 6:03 ` Andrew Morton
2002-09-13 13:05 ` Alan Cox
2002-09-13 21:30 ` William Lee Irwin III
2002-09-18 16:07 ` [PATCH] recognize MAP_LOCKED in mmap() call Hubertus Franke
2002-09-18 16:29 ` Andrew Morton
2002-09-16 5:44 ` [PATCH] per-zone kswapd process Daniel Phillips
2002-09-16 7:46 ` William Lee Irwin III
2002-09-16 15:12 ` Rik van Riel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3D815C8C.4050000@us.ibm.com \
--to=haveblue@us.ibm.com \
--cc=Martin.Bligh@us.ibm.com \
--cc=akpm@zip.com.au \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=wli@holomorphy.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).