From: Pavel Emelianov <xemul@sw.ru>
To: Andrew Morton <akpm@osdl.org>, Paul Menage <menage@google.com>,
Srivatsa Vaddagiri <vatsa@in.ibm.com>,
Balbir Singh <balbir@in.ibm.com>
Cc: devel@openvz.org,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Kirill Korotaev <dev@sw.ru>,
Chandra Seetharaman <sekharan@us.ibm.com>,
Cedric Le Goater <clg@fr.ibm.com>,
"Eric W. Biederman" <ebiederm@xmission.com>,
Rohit Seth <rohitseth@google.com>,
Linux Containers <containers@lists.osdl.org>
Subject: [PATCH 7/8] Page scanner changes needed to implement per-container scanner
Date: Mon, 09 Apr 2007 17:00:53 +0400 [thread overview]
Message-ID: <461A3905.1070700@sw.ru> (raw)
In-Reply-To: <461A3010.90403@sw.ru>
[-- Attachment #1: Type: text/plain, Size: 159 bytes --]
Struct scan_control now carries:
* the RSS container to free pages in;
* pointer to an isolate_pages() function to isolate
pages needed for the reclamation.
[-- Attachment #2: diff-rss-container-reclaim-core --]
[-- Type: text/plain, Size: 5755 bytes --]
diff -upr linux-2.6.20.orig/mm/vmscan.c linux-2.6.20-2/mm/vmscan.c
--- linux-2.6.20.orig/mm/vmscan.c 2007-03-06 19:09:50.000000000 +0300
+++ linux-2.6.20-2/mm/vmscan.c 2007-04-09 11:26:06.000000000 +0400
@@ -45,6 +45,8 @@
#include "internal.h"
+#include <linux/rss_container.h>
+
struct scan_control {
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
@@ -66,6 +68,10 @@ struct scan_control {
int swappiness;
int all_unreclaimable;
+ struct rss_container *cnt;
+ unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
+ unsigned long *scanned, struct zone *zone,
+ struct rss_container *cont, int active);
};
/*
@@ -654,6 +660,17 @@ static unsigned long isolate_lru_pages(u
return nr_taken;
}
+static unsigned long isolate_pages_global(unsigned long nr,
+ struct list_head *dst, unsigned long *scanned,
+ struct zone *z, struct rss_container *cont,
+ int active)
+{
+ if (active)
+ return isolate_lru_pages(nr, &z->active_list, dst, scanned);
+ else
+ return isolate_lru_pages(nr, &z->inactive_list, dst, scanned);
+}
+
/*
* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
* of reclaimed pages
@@ -676,9 +693,9 @@ static unsigned long shrink_inactive_lis
unsigned long nr_scan;
unsigned long nr_freed;
- nr_taken = isolate_lru_pages(sc->swap_cluster_max,
- &zone->inactive_list,
- &page_list, &nr_scan);
+ nr_taken = sc->isolate_pages(sc->swap_cluster_max, &page_list,
+ &nr_scan, zone, sc->cnt, 0);
+
zone->nr_inactive -= nr_taken;
zone->pages_scanned += nr_scan;
spin_unlock_irq(&zone->lru_lock);
@@ -822,8 +839,8 @@ force_reclaim_mapped:
lru_add_drain();
spin_lock_irq(&zone->lru_lock);
- pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
- &l_hold, &pgscanned);
+ pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, zone,
+ sc->cnt, 1);
zone->pages_scanned += pgscanned;
zone->nr_active -= pgmoved;
spin_unlock_irq(&zone->lru_lock);
@@ -1012,7 +1031,9 @@ static unsigned long shrink_zones(int pr
* holds filesystem locks which prevent writeout this might not work, and the
* allocation attempt will fail.
*/
-unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
+
+static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
+ struct scan_control *sc)
{
int priority;
int ret = 0;
@@ -1021,13 +1042,6 @@ unsigned long try_to_free_pages(struct z
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long lru_pages = 0;
int i;
- struct scan_control sc = {
- .gfp_mask = gfp_mask,
- .may_writepage = !laptop_mode,
- .swap_cluster_max = SWAP_CLUSTER_MAX,
- .may_swap = 1,
- .swappiness = vm_swappiness,
- };
count_vm_event(ALLOCSTALL);
@@ -1041,17 +1055,18 @@ unsigned long try_to_free_pages(struct z
}
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
- sc.nr_scanned = 0;
+ sc->nr_scanned = 0;
if (!priority)
disable_swap_token();
- nr_reclaimed += shrink_zones(priority, zones, &sc);
- shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
+ nr_reclaimed += shrink_zones(priority, zones, sc);
+ if (sc->cnt == NULL)
+ shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
if (reclaim_state) {
nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
}
- total_scanned += sc.nr_scanned;
- if (nr_reclaimed >= sc.swap_cluster_max) {
+ total_scanned += sc->nr_scanned;
+ if (nr_reclaimed >= sc->swap_cluster_max) {
ret = 1;
goto out;
}
@@ -1063,18 +1078,18 @@ unsigned long try_to_free_pages(struct z
* that's undesirable in laptop mode, where we *want* lumpy
* writeout. So in laptop mode, write out the whole world.
*/
- if (total_scanned > sc.swap_cluster_max +
- sc.swap_cluster_max / 2) {
+ if (total_scanned > sc->swap_cluster_max +
+ sc->swap_cluster_max / 2) {
wakeup_pdflush(laptop_mode ? 0 : total_scanned);
- sc.may_writepage = 1;
+ sc->may_writepage = 1;
}
/* Take a nap, wait for some writeback to complete */
- if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
+ if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ/10);
}
/* top priority shrink_caches still had more to do? don't OOM, then */
- if (!sc.all_unreclaimable)
+ if (!sc->all_unreclaimable)
ret = 1;
out:
/*
@@ -1097,6 +1112,21 @@ out:
return ret;
}
+unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
+{
+ struct scan_control sc = {
+ .gfp_mask = gfp_mask,
+ .may_writepage = !laptop_mode,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
+ .may_swap = 1,
+ .swappiness = vm_swappiness,
+ .cnt = NULL,
+ .isolate_pages = isolate_pages_global,
+ };
+
+ return do_try_to_free_pages(zones, gfp_mask, &sc);
+}
+
/*
* For kswapd, balance_pgdat() will work across all this node's zones until
* they are all at pages_high.
@@ -1131,6 +1190,8 @@ static unsigned long balance_pgdat(pg_da
.may_swap = 1,
.swap_cluster_max = SWAP_CLUSTER_MAX,
.swappiness = vm_swappiness,
+ .cnt = NULL,
+ .isolate_pages = isolate_pages_global,
};
/*
* temp_priority is used to remember the scanning priority at which
@@ -1436,6 +1497,8 @@ unsigned long shrink_all_memory(unsigned
.swap_cluster_max = nr_pages,
.may_writepage = 1,
.swappiness = vm_swappiness,
+ .cnt = NULL,
+ .isolate_pages = isolate_pages_global,
};
current->reclaim_state = &reclaim_state;
@@ -1619,6 +1682,8 @@ static int __zone_reclaim(struct zone *z
SWAP_CLUSTER_MAX),
.gfp_mask = gfp_mask,
.swappiness = vm_swappiness,
+ .cnt = NULL,
+ .isolate_pages = isolate_pages_global,
};
unsigned long slab_reclaimable;
next prev parent reply other threads:[~2007-04-09 13:04 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-09 12:22 [PATCH 0/8] RSS controller based on process containers (v2) Pavel Emelianov
2007-04-09 12:35 ` [PATCH 1/8] Resource counters Pavel Emelianov
2007-04-09 12:41 ` [PATCH 2/8] Add container pointer on struct page Pavel Emelianov
2007-04-13 13:56 ` Jean-Pierre Dion
2007-04-13 14:52 ` Pavel Emelianov
2007-04-09 12:46 ` [PATCH 3/8] Add container pointer on mm_struct Pavel Emelianov
2007-04-09 12:49 ` [PATCH 4/8] RSS container core Pavel Emelianov
2007-04-09 12:54 ` [PATCH 5/8] RSS accounting hooks over the code Pavel Emelianov
2007-04-09 12:56 ` [PATCH 6/8] Per container OOM killer Pavel Emelianov
2007-04-09 13:00 ` Pavel Emelianov [this message]
2007-04-09 13:02 ` [PATCH 8/8] Per-container pages reclamation Pavel Emelianov
2007-04-24 9:47 ` Balbir Singh
2007-04-24 10:34 ` Pavel Emelianov
2007-04-24 11:01 ` Balbir Singh
2007-04-24 11:37 ` Pavel Emelianov
2007-05-02 9:51 ` Balbir Singh
2007-05-17 11:31 ` Balbir Singh
2007-05-21 15:15 ` Pavel Emelianov
2007-05-24 7:59 ` Balbir Singh
2007-04-09 15:54 ` [PATCH 0/8] RSS controller based on process containers (v2) Peter Zijlstra
2007-04-10 8:30 ` Pavel Emelianov
2007-04-19 5:37 ` Vaidyanathan Srinivasan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=461A3905.1070700@sw.ru \
--to=xemul@sw.ru \
--cc=akpm@osdl.org \
--cc=balbir@in.ibm.com \
--cc=clg@fr.ibm.com \
--cc=containers@lists.osdl.org \
--cc=dev@sw.ru \
--cc=devel@openvz.org \
--cc=ebiederm@xmission.com \
--cc=linux-kernel@vger.kernel.org \
--cc=menage@google.com \
--cc=rohitseth@google.com \
--cc=sekharan@us.ibm.com \
--cc=vatsa@in.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).