From: Mel Gorman <mgorman@techsingularity.net> To: Andrew Morton <akpm@linux-foundation.org>, Linux-MM <linux-mm@kvack.org> Cc: Rik van Riel <riel@surriel.com>, Vlastimil Babka <vbabka@suse.cz>, Johannes Weiner <hannes@cmpxchg.org>, LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@techsingularity.net> Subject: [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric Date: Thu, 9 Jun 2016 19:04:28 +0100 [thread overview] Message-ID: <1465495483-11855-13-git-send-email-mgorman@techsingularity.net> (raw) In-Reply-To: <1465495483-11855-1-git-send-email-mgorman@techsingularity.net> Earlier patches focused on having direct reclaim and kswapd use data that is node-centric for reclaiming but shrink_node() itself still uses too much zone information. This patch removes unnecessary zone-based information with the most important decision being whether to continue reclaim or not. Some memcg APIs are adjusted as a result even though memcg itself still uses some zone information. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> --- include/linux/memcontrol.h | 9 +++---- include/linux/mmzone.h | 4 ++-- include/linux/swap.h | 2 +- mm/memcontrol.c | 17 +++++++------- mm/page_alloc.c | 2 +- mm/vmscan.c | 58 ++++++++++++++++++++++++++-------------------- mm/workingset.c | 6 ++--- 7 files changed, 54 insertions(+), 44 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9adb1bebb487..beee5be5d5d5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -306,7 +306,8 @@ void mem_cgroup_uncharge_list(struct list_head *page_list); void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); -struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); +struct lruvec *mem_cgroup_lruvec(struct pglist_data *, struct zone *zone, + struct mem_cgroup *); struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); @@ -573,10 +574,10 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new) { } -static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, - struct mem_cgroup *memcg) +static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, + struct zone *zone, struct mem_cgroup *memcg) { - return zone_lruvec(zone); + return node_lruvec(pgdat); } static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 15ce454d0d59..16da23c095a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -735,9 +735,9 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone) return &zone->zone_pgdat->lru_lock; } -static inline struct lruvec *zone_lruvec(struct zone *zone) +static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) { - return &zone->zone_pgdat->lruvec; + return &pgdat->lruvec; } static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) diff --git a/include/linux/swap.h b/include/linux/swap.h index cc5c958c050b..575efae79a66 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -324,7 +324,7 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, bool may_swap); -extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, +extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, struct zone *zone, unsigned long *nr_scanned); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 12f796e52e5e..cda2342af10e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -944,22 +944,23 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) iter = mem_cgroup_iter(NULL, iter, NULL)) /** - * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg + * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone + * @node: node of the wanted lruvec * @zone: zone of the wanted lruvec * @memcg: memcg of the wanted lruvec * - * Returns the lru list vector holding pages for the given @zone and - * @mem. This can be the global zone lruvec, if the memory controller + * Returns the lru list vector holding pages for a given @node or a given + * @memcg and @zone. This can be the node lruvec, if the memory controller * is disabled. */ -struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, - struct mem_cgroup *memcg) +struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, + struct zone *zone, struct mem_cgroup *memcg) { struct mem_cgroup_per_zone *mz; struct lruvec *lruvec; if (mem_cgroup_disabled()) { - lruvec = zone_lruvec(zone); + lruvec = node_lruvec(pgdat); goto out; } @@ -1473,8 +1474,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, } continue; } - total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, - zone, &nr_scanned); + total += mem_cgroup_shrink_node(victim, gfp_mask, false, + zone, &nr_scanned); *total_scanned += nr_scanned; if (!soft_limit_excess(root_memcg)) break; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d8cb483d5cad..fc5bad895732 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5905,6 +5905,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) init_waitqueue_head(&pgdat->kcompactd_wait); #endif pgdat_page_ext_init(pgdat); + lruvec_init(node_lruvec(pgdat)); for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; @@ -5968,7 +5969,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) /* For bootup, initialized properly in watermark setup */ mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); - lruvec_init(zone_lruvec(zone)); if (!size) continue; diff --git a/mm/vmscan.c b/mm/vmscan.c index 6663fc75c3bc..93523944abb0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2196,10 +2196,11 @@ static inline void init_tlb_ubc(void) /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg, +static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg, struct scan_control *sc, unsigned long *lru_pages) { - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + struct zone *zone = &pgdat->node_zones[sc->reclaim_idx]; + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); unsigned long nr[NR_LRU_LISTS]; unsigned long targets[NR_LRU_LISTS]; unsigned long nr_to_scan; @@ -2332,13 +2333,14 @@ static bool in_reclaim_compaction(struct scan_control *sc) * calls try_to_compact_zone() that it will have enough free pages to succeed. * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct zone *zone, +static inline bool should_continue_reclaim(struct pglist_data *pgdat, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; + int z; /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) @@ -2372,21 +2374,27 @@ static inline bool should_continue_reclaim(struct zone *zone, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE); + inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_nr_swap_pages() > 0) - inactive_lru_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); + inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(zone, sc->order, 0, 0)) { - case COMPACT_PARTIAL: - case COMPACT_CONTINUE: - return false; - default: - return true; + for (z = 0; z <= sc->reclaim_idx; z++) { + struct zone *zone = &pgdat->node_zones[z]; + + switch (compaction_suitable(zone, sc->order, 0, 0)) { + case COMPACT_PARTIAL: + case COMPACT_CONTINUE: + return false; + default: + /* check next zone */ + ; + } } + return true; } static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, @@ -2395,15 +2403,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long nr_reclaimed, nr_scanned; bool reclaimable = false; - struct zone *zone = &pgdat->node_zones[classzone_idx]; do { struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { - .zone = zone, + .zone = &pgdat->node_zones[classzone_idx], .priority = sc->priority, }; - unsigned long zone_lru_pages = 0; + unsigned long node_lru_pages = 0; struct mem_cgroup *memcg; nr_reclaimed = sc->nr_reclaimed; @@ -2424,11 +2431,11 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, reclaimed = sc->nr_reclaimed; scanned = sc->nr_scanned; - shrink_zone_memcg(zone, memcg, sc, &lru_pages); - zone_lru_pages += lru_pages; + shrink_node_memcg(pgdat, memcg, sc, &lru_pages); + node_lru_pages += lru_pages; if (!global_reclaim(sc) && sc->reclaim_idx == classzone_idx) - shrink_slab(sc->gfp_mask, zone_to_nid(zone), + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->nr_scanned - scanned, lru_pages); @@ -2440,7 +2447,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, /* * Direct reclaim and kswapd have to scan all memory * cgroups to fulfill the overall scan target for the - * zone. + * node. * * Limit reclaim, on the other hand, only cares about * nr_to_reclaim pages to be reclaimed and it will @@ -2459,9 +2466,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, * the eligible LRU pages were scanned. */ if (global_reclaim(sc) && sc->reclaim_idx == classzone_idx) - shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, + shrink_slab(sc->gfp_mask, pgdat->node_id, NULL, sc->nr_scanned - nr_scanned, - zone_lru_pages); + node_lru_pages); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; @@ -2476,7 +2483,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, if (sc->nr_reclaimed - nr_reclaimed) reclaimable = true; - } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, + } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); return reclaimable; @@ -2866,7 +2873,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_MEMCG -unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, +unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, struct zone *zone, unsigned long *nr_scanned) @@ -2876,6 +2883,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .target_mem_cgroup = memcg, .may_writepage = !laptop_mode, .may_unmap = 1, + .reclaim_idx = zone_idx(zone), .may_swap = !noswap, }; unsigned long lru_pages; @@ -2890,11 +2898,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, /* * NOTE: Although we can get the priority field, using it * here is not a good idea, since it limits the pages we can scan. - * if we don't reclaim here, the shrink_zone from balance_pgdat + * if we don't reclaim here, the shrink_node from balance_pgdat * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_zone_memcg(zone, memcg, &sc, &lru_pages); + shrink_node_memcg(zone->zone_pgdat, memcg, &sc, &lru_pages); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -2952,7 +2960,7 @@ static void age_active_anon(struct pglist_data *pgdat, memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); if (inactive_list_is_low(lruvec, false)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, diff --git a/mm/workingset.c b/mm/workingset.c index c0820e06aaff..10ddf707782a 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -218,7 +218,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page) VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - lruvec = mem_cgroup_zone_lruvec(zone, memcg); + lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg); eviction = atomic_long_inc_return(&lruvec->inactive_age); return pack_shadow(memcgid, zone, eviction); } @@ -267,7 +267,7 @@ bool workingset_refault(void *shadow) rcu_read_unlock(); return false; } - lruvec = mem_cgroup_zone_lruvec(zone, memcg); + lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg); refault = atomic_long_read(&lruvec->inactive_age); active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); rcu_read_unlock(); @@ -317,7 +317,7 @@ void workingset_activation(struct page *page) */ if (!mem_cgroup_disabled() && !page_memcg(page)) goto out; - lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page)); + lruvec = mem_cgroup_lruvec(page_zone(page)->zone_pgdat, page_zone(page), page_memcg(page)); atomic_long_inc(&lruvec->inactive_age); out: unlock_page_memcg(page); -- 2.6.4
next prev parent reply other threads:[~2016-06-09 18:07 UTC|newest] Thread overview: 75+ messages / expand[flat|nested] mbox.gz Atom feed top 2016-06-09 18:04 [PATCH 00/27] Move LRU page reclaim from zones to nodes v6 Mel Gorman 2016-06-09 18:04 ` [PATCH 01/27] mm, vmstat: Add infrastructure for per-node vmstats Mel Gorman 2016-06-10 13:31 ` Vlastimil Babka 2016-06-10 13:47 ` Mel Gorman 2016-06-13 17:26 ` Christoph Lameter 2016-06-14 14:25 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 02/27] mm, vmscan: Move lru_lock to the node Mel Gorman 2016-06-10 16:39 ` Vlastimil Babka 2016-06-10 17:48 ` Peter Zijlstra 2016-06-14 14:41 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 03/27] mm, vmscan: Move LRU lists to node Mel Gorman 2016-06-10 18:00 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 04/27] mm, vmscan: Begin reclaiming pages on a per-node basis Mel Gorman 2016-06-15 12:52 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 05/27] mm, vmscan: Have kswapd only scan based on the highest requested zone Mel Gorman 2016-06-15 13:13 ` Vlastimil Babka 2016-06-16 7:44 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 06/27] mm, vmscan: Make kswapd reclaim in terms of nodes Mel Gorman 2016-06-15 14:23 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 07/27] mm, vmscan: Remove balance gap Mel Gorman 2016-06-15 14:28 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 08/27] mm, vmscan: Simplify the logic deciding whether kswapd sleeps Mel Gorman 2016-06-15 15:18 ` Vlastimil Babka 2016-06-16 8:30 ` Mel Gorman 2016-06-16 9:16 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 09/27] mm, vmscan: By default have direct reclaim only shrink once per node Mel Gorman 2016-06-16 8:59 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 10/27] mm, vmscan: Clear congestion, dirty and need for compaction on a per-node basis Mel Gorman 2016-06-16 9:29 ` Vlastimil Babka 2016-06-16 10:29 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 11/27] mm: vmscan: Do not reclaim from kswapd if there is any eligible zone Mel Gorman 2016-06-16 10:08 ` Vlastimil Babka 2016-06-09 18:04 ` Mel Gorman [this message] 2016-06-16 13:35 ` [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric Vlastimil Babka 2016-06-16 14:47 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 13/27] mm, memcg: Move memcg limit enforcement from zones to nodes Mel Gorman 2016-06-16 15:06 ` Vlastimil Babka 2016-06-16 15:53 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 14/27] mm, workingset: Make working set detection node-aware Mel Gorman 2016-06-16 15:13 ` Vlastimil Babka 2016-06-16 15:56 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 15/27] mm, page_alloc: Consider dirtyable memory in terms of nodes Mel Gorman 2016-06-16 15:45 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 16/27] mm: Move page mapped accounting to the node Mel Gorman 2016-06-16 15:52 ` Vlastimil Babka 2016-06-16 16:04 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 17/27] mm: Rename NR_ANON_PAGES to NR_ANON_MAPPED Mel Gorman 2016-06-17 8:28 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 18/27] mm: Move most file-based accounting to the node Mel Gorman 2016-06-17 8:35 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 19/27] mm: Move vmscan writes and file write " Mel Gorman 2016-06-17 8:51 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 20/27] mm, vmscan: Update classzone_idx if buffer_heads_over_limit Mel Gorman 2016-06-17 8:53 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 21/27] mm, vmscan: Only wakeup kswapd once per node for the requested classzone Mel Gorman 2016-06-17 10:46 ` Vlastimil Babka 2016-06-17 12:03 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 22/27] mm: Convert zone_reclaim to node_reclaim Mel Gorman 2016-06-17 10:55 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 23/27] mm, vmscan: Add classzone information to tracepoints Mel Gorman 2016-06-17 10:57 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 24/27] mm, page_alloc: Remove fair zone allocation policy Mel Gorman 2016-06-17 11:27 ` Vlastimil Babka 2016-06-17 12:07 ` Mel Gorman 2016-06-09 18:04 ` [PATCH 25/27] mm: page_alloc: Cache the last node whose dirty limit is reached Mel Gorman 2016-06-17 11:31 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 26/27] mm: vmstat: Replace __count_zone_vm_events with a zone id equivalent Mel Gorman 2016-06-17 11:36 ` Vlastimil Babka 2016-06-09 18:04 ` [PATCH 27/27] mm: vmstat: Account per-zone stalls and pages skipped during reclaim Mel Gorman 2016-06-17 12:39 ` Vlastimil Babka -- strict thread matches above, loose matches on Subject: below -- 2016-06-21 14:15 [PATCH 00/27] Move LRU page reclaim from zones to nodes v7 Mel Gorman 2016-06-21 14:15 ` [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric Mel Gorman 2016-06-22 13:20 ` Michal Hocko 2016-06-22 15:42 ` Vlastimil Babka 2016-04-15 9:13 [PATCH 00/27] Move LRU page reclaim from zones to nodes v5 Mel Gorman 2016-04-15 9:13 ` [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric Mel Gorman 2016-04-06 11:19 [RFC PATCH 00/27] Move LRU page reclaim from zones to nodes v3 Mel Gorman 2016-04-06 11:20 ` [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric Mel Gorman
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1465495483-11855-13-git-send-email-mgorman@techsingularity.net \ --to=mgorman@techsingularity.net \ --cc=akpm@linux-foundation.org \ --cc=hannes@cmpxchg.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=riel@surriel.com \ --cc=vbabka@suse.cz \ --subject='Re: [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).