From: Mel Gorman <mgorman@techsingularity.net>
To: Linux-MM <linux-mm@kvack.org>
Cc: Rik van Riel <riel@surriel.com>, Vlastimil Babka <vbabka@suse.cz>,
Johannes Weiner <hannes@cmpxchg.org>,
LKML <linux-kernel@vger.kernel.org>,
Mel Gorman <mgorman@techsingularity.net>
Subject: [PATCH 14/27] mm, workingset: Make working set detection node-aware
Date: Wed, 6 Apr 2016 12:20:13 +0100 [thread overview]
Message-ID: <1459941626-3290-15-git-send-email-mgorman@techsingularity.net> (raw)
In-Reply-To: <1459941626-3290-1-git-send-email-mgorman@techsingularity.net>
Working set and refault detection is still zone-based, fix it.
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
include/linux/mmzone.h | 6 +++---
include/linux/vmstat.h | 1 -
mm/vmstat.c | 20 +++-----------------
mm/workingset.c | 39 ++++++++++++++++++---------------------
4 files changed, 24 insertions(+), 42 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dc09acedc88d..ace553a21547 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -146,9 +146,6 @@ enum zone_stat_item {
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
#endif
- WORKINGSET_REFAULT,
- WORKINGSET_ACTIVATE,
- WORKINGSET_NODERECLAIM,
NR_ANON_TRANSPARENT_HUGEPAGES,
NR_FREE_CMA_PAGES,
NR_VM_ZONE_STAT_ITEMS };
@@ -163,6 +160,9 @@ enum node_stat_item {
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
NR_PAGES_SCANNED, /* pages scanned since last reclaim */
+ WORKINGSET_REFAULT,
+ WORKINGSET_ACTIVATE,
+ WORKINGSET_NODERECLAIM,
NR_VM_NODE_STAT_ITEMS
};
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 8a43f7b80c20..ea00884ac8a0 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -232,7 +232,6 @@ void mod_node_page_state(struct pglist_data *, enum node_stat_item, long);
void inc_node_page_state(struct page *, enum node_stat_item);
void dec_node_page_state(struct page *, enum node_stat_item);
-extern void inc_zone_state(struct zone *, enum zone_stat_item);
extern void inc_node_state(struct pglist_data *, enum node_stat_item);
extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void __inc_node_state(struct pglist_data *, enum node_stat_item);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 5c1b0e1108d6..80a75f8ede5b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -433,11 +433,6 @@ void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
}
EXPORT_SYMBOL(mod_zone_page_state);
-void inc_zone_state(struct zone *zone, enum zone_stat_item item)
-{
- mod_zone_state(zone, item, 1, 1);
-}
-
void inc_zone_page_state(struct page *page, enum zone_stat_item item)
{
mod_zone_state(page_zone(page), item, 1, 1);
@@ -526,15 +521,6 @@ void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
}
EXPORT_SYMBOL(mod_zone_page_state);
-void inc_zone_state(struct zone *zone, enum zone_stat_item item)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __inc_zone_state(zone, item);
- local_irq_restore(flags);
-}
-
void inc_zone_page_state(struct page *page, enum zone_stat_item item)
{
unsigned long flags;
@@ -979,9 +965,6 @@ const char * const vmstat_text[] = {
"numa_local",
"numa_other",
#endif
- "workingset_refault",
- "workingset_activate",
- "workingset_nodereclaim",
"nr_anon_transparent_hugepages",
"nr_free_cma",
@@ -994,6 +977,9 @@ const char * const vmstat_text[] = {
"nr_isolated_anon",
"nr_isolated_file",
"nr_pages_scanned",
+ "workingset_refault",
+ "workingset_activate",
+ "workingset_nodereclaim",
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
diff --git a/mm/workingset.c b/mm/workingset.c
index d06d69670b5d..48596c7a910e 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -16,7 +16,7 @@
/*
* Double CLOCK lists
*
- * Per zone, two clock lists are maintained for file pages: the
+ * Per node, two clock lists are maintained for file pages: the
* inactive and the active list. Freshly faulted pages start out at
* the head of the inactive list and page reclaim scans pages from the
* tail. Pages that are accessed multiple times on the inactive list
@@ -141,11 +141,11 @@
*
* Implementation
*
- * For each zone's file LRU lists, a counter for inactive evictions
- * and activations is maintained (zone->inactive_age).
+ * For each node's file LRU lists, a counter for inactive evictions
+ * and activations is maintained (node->inactive_age).
*
* On eviction, a snapshot of this counter (along with some bits to
- * identify the zone) is stored in the now empty page cache radix tree
+ * identify the node) is stored in the now empty page cache radix tree
* slot of the evicted page. This is called a shadow entry.
*
* On cache misses for which there are shadow entries, an eligible
@@ -153,7 +153,7 @@
*/
#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \
- ZONES_SHIFT + NODES_SHIFT + \
+ NODES_SHIFT + \
MEM_CGROUP_ID_SHIFT)
#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
@@ -167,33 +167,30 @@
*/
static unsigned int bucket_order __read_mostly;
-static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction)
+static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
{
eviction >>= bucket_order;
eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
- eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone);
- eviction = (eviction << ZONES_SHIFT) | zone_idx(zone);
+ eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
}
-static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
+static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
unsigned long *evictionp)
{
unsigned long entry = (unsigned long)shadow;
- int memcgid, nid, zid;
+ int memcgid, nid;
entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
- zid = entry & ((1UL << ZONES_SHIFT) - 1);
- entry >>= ZONES_SHIFT;
nid = entry & ((1UL << NODES_SHIFT) - 1);
entry >>= NODES_SHIFT;
memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
entry >>= MEM_CGROUP_ID_SHIFT;
*memcgidp = memcgid;
- *zonep = NODE_DATA(nid)->node_zones + zid;
+ *pgdat = NODE_DATA(nid);
*evictionp = entry << bucket_order;
}
@@ -220,7 +217,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg);
eviction = atomic_long_inc_return(&lruvec->inactive_age);
- return pack_shadow(memcgid, zone, eviction);
+ return pack_shadow(memcgid, zone->zone_pgdat, eviction);
}
/**
@@ -228,7 +225,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
* @shadow: shadow entry of the evicted page
*
* Calculates and evaluates the refault distance of the previously
- * evicted page in the context of the zone it was allocated in.
+ * evicted page in the context of the node it was allocated in.
*
* Returns %true if the page should be activated, %false otherwise.
*/
@@ -240,10 +237,10 @@ bool workingset_refault(void *shadow)
unsigned long eviction;
struct lruvec *lruvec;
unsigned long refault;
- struct zone *zone;
+ struct pglist_data *pgdat;
int memcgid;
- unpack_shadow(shadow, &memcgid, &zone, &eviction);
+ unpack_shadow(shadow, &memcgid, &pgdat, &eviction);
rcu_read_lock();
/*
@@ -267,7 +264,7 @@ bool workingset_refault(void *shadow)
rcu_read_unlock();
return false;
}
- lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg);
+ lruvec = mem_cgroup_lruvec(pgdat, memcg);
refault = atomic_long_read(&lruvec->inactive_age);
active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
rcu_read_unlock();
@@ -290,10 +287,10 @@ bool workingset_refault(void *shadow)
*/
refault_distance = (refault - eviction) & EVICTION_MASK;
- inc_zone_state(zone, WORKINGSET_REFAULT);
+ inc_node_state(pgdat, WORKINGSET_REFAULT);
if (refault_distance <= active_file) {
- inc_zone_state(zone, WORKINGSET_ACTIVATE);
+ inc_node_state(pgdat, WORKINGSET_ACTIVATE);
return true;
}
return false;
@@ -435,7 +432,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
}
}
BUG_ON(node->count);
- inc_zone_state(page_zone(virt_to_page(node)), WORKINGSET_NODERECLAIM);
+ inc_node_state(page_zone(virt_to_page(node))->zone_pgdat, WORKINGSET_NODERECLAIM);
if (!__radix_tree_delete_node(&mapping->page_tree, node))
BUG();
--
2.6.4
next prev parent reply other threads:[~2016-04-06 11:23 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-04-06 11:19 [RFC PATCH 00/27] Move LRU page reclaim from zones to nodes v3 Mel Gorman
2016-04-06 11:20 ` [PATCH 01/27] mm, page_alloc: Use ac->classzone_idx instead of zone_idx(preferred_zone) Mel Gorman
2016-04-06 11:20 ` [PATCH 02/27] mm, vmstat: Add infrastructure for per-node vmstats Mel Gorman
2016-04-06 11:20 ` [PATCH 03/27] mm, vmscan: Move lru_lock to the node Mel Gorman
2016-04-06 11:20 ` [PATCH 04/27] mm, vmscan: Move LRU lists to node Mel Gorman
2016-04-06 11:20 ` [PATCH 05/27] mm, vmscan: Begin reclaiming pages on a per-node basis Mel Gorman
2016-04-06 11:20 ` [PATCH 06/27] mm, vmscan: Have kswapd only scan based on the highest requested zone Mel Gorman
2016-04-06 11:20 ` [PATCH 07/27] mm, vmscan: Make kswapd reclaim in terms of nodes Mel Gorman
2016-04-06 11:20 ` [PATCH 08/27] mm, vmscan: Simplify the logic deciding whether kswapd sleeps Mel Gorman
2016-04-06 11:20 ` [PATCH 09/27] mm, vmscan: By default have direct reclaim only shrink once per node Mel Gorman
2016-04-06 11:20 ` [PATCH 10/27] mm, vmscan: Clear congestion, dirty and need for compaction on a per-node basis Mel Gorman
2016-04-06 11:20 ` [PATCH 11/27] mm: vmscan: Do not reclaim from kswapd if there is any eligible zone Mel Gorman
2016-04-06 11:20 ` [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric Mel Gorman
2016-04-06 11:20 ` [PATCH 13/27] mm, memcg: Move memcg limit enforcement from zones to nodes Mel Gorman
2016-04-06 11:20 ` Mel Gorman [this message]
2016-04-06 11:20 ` [PATCH 15/27] mm, page_alloc: Consider dirtyable memory in terms of nodes Mel Gorman
2016-04-06 11:20 ` [PATCH 16/27] mm: Move page mapped accounting to the node Mel Gorman
2016-04-06 11:20 ` [PATCH 17/27] mm: Rename NR_ANON_PAGES to NR_ANON_MAPPED Mel Gorman
2016-04-06 11:20 ` [PATCH 18/27] mm: Move most file-based accounting to the node Mel Gorman
2016-04-06 11:20 ` [PATCH 19/27] mm: Move vmscan writes and file write " Mel Gorman
2016-04-06 11:20 ` [PATCH 20/27] mm, vmscan: Update classzone_idx if buffer_heads_over_limit Mel Gorman
2016-04-06 11:20 ` [PATCH 21/27] mm, vmscan: Only wakeup kswapd once per node for the requested classzone Mel Gorman
2016-04-15 9:13 [PATCH 00/27] Move LRU page reclaim from zones to nodes v5 Mel Gorman
2016-04-15 9:13 ` [PATCH 14/27] mm, workingset: Make working set detection node-aware Mel Gorman
2016-06-09 18:04 [PATCH 00/27] Move LRU page reclaim from zones to nodes v6 Mel Gorman
2016-06-09 18:04 ` [PATCH 14/27] mm, workingset: Make working set detection node-aware Mel Gorman
2016-06-16 15:13 ` Vlastimil Babka
2016-06-16 15:56 ` Mel Gorman
2016-06-21 14:15 [PATCH 00/27] Move LRU page reclaim from zones to nodes v7 Mel Gorman
2016-06-21 14:15 ` [PATCH 14/27] mm, workingset: Make working set detection node-aware Mel Gorman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1459941626-3290-15-git-send-email-mgorman@techsingularity.net \
--to=mgorman@techsingularity.net \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=riel@surriel.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).