From: Mel Gorman <mgorman@techsingularity.net> To: Andrew Morton <akpm@linux-foundation.org>, Linux-MM <linux-mm@kvack.org> Cc: Rik van Riel <riel@surriel.com>, Vlastimil Babka <vbabka@suse.cz>, Johannes Weiner <hannes@cmpxchg.org>, LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@techsingularity.net> Subject: [PATCH 24/27] mm, page_alloc: Remove fair zone allocation policy Date: Tue, 21 Jun 2016 15:16:03 +0100 [thread overview] Message-ID: <1466518566-30034-25-git-send-email-mgorman@techsingularity.net> (raw) In-Reply-To: <1466518566-30034-1-git-send-email-mgorman@techsingularity.net> The fair zone allocation policy interleaves allocation requests between zones to avoid an age inversion problem whereby new pages are reclaimed to balance a zone. Reclaim is now node-based so this should no longer be an issue and the fair zone allocation policy is not free. This patch removes it. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> --- include/linux/mmzone.h | 5 ---- mm/internal.h | 1 - mm/page_alloc.c | 75 +------------------------------------------------- mm/vmstat.c | 4 +-- 4 files changed, 2 insertions(+), 83 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 79fb9f6efc55..2c331f0afd04 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -110,7 +110,6 @@ struct zone_padding { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, - NR_ALLOC_BATCH, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_LRU_ANON = NR_ZONE_LRU_BASE, NR_ZONE_LRU_FILE, @@ -512,10 +511,6 @@ struct zone { atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; -enum zone_flags { - ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ -}; - enum pgdat_flags { PGDAT_CONGESTED, /* zone has many dirty pages backed by * a congested BDI diff --git a/mm/internal.h b/mm/internal.h index 4abb2336e127..863b12ca7aa2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -469,7 +469,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ -#define ALLOC_FAIR 0x100 /* fair zone allocation */ enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 78e5abc41857..1b5080475481 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2592,7 +2592,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, else page = list_first_entry(list, struct page, lru); - __dec_zone_state(zone, NR_ALLOC_BATCH); list_del(&page->lru); pcp->count--; @@ -2618,15 +2617,10 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, spin_unlock(&zone->lock); if (!page) goto failed; - __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); __mod_zone_freepage_state(zone, -(1 << order), get_pcppage_migratetype(page)); } - if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 && - !test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) - set_bit(ZONE_FAIR_DEPLETED, &zone->flags); - __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); @@ -2837,40 +2831,18 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, } #ifdef CONFIG_NUMA -static bool zone_local(struct zone *local_zone, struct zone *zone) -{ - return local_zone->node == zone->node; -} - static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < RECLAIM_DISTANCE; } #else /* CONFIG_NUMA */ -static bool zone_local(struct zone *local_zone, struct zone *zone) -{ - return true; -} - static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return true; } #endif /* CONFIG_NUMA */ -static void reset_alloc_batches(struct zone *preferred_zone) -{ - struct zone *zone = preferred_zone->zone_pgdat->node_zones; - - do { - mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - low_wmark_pages(zone) - - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - clear_bit(ZONE_FAIR_DEPLETED, &zone->flags); - } while (zone++ != preferred_zone); -} - /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. @@ -2881,10 +2853,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, { struct zoneref *z = ac->preferred_zoneref; struct zone *zone; - bool fair_skipped = false; - bool apply_fair = (alloc_flags & ALLOC_FAIR); - -zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. * See also __cpuset_node_allowed() comment in kernel/cpuset.c. @@ -2899,23 +2867,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, !__cpuset_zone_allowed(zone, gfp_mask)) continue; /* - * Distribute pages in proportion to the individual - * zone size to ensure fair page aging. The zone a - * page was allocated in should have no effect on the - * time the page has in memory before being reclaimed. - */ - if (apply_fair) { - if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) { - fair_skipped = true; - continue; - } - if (!zone_local(ac->preferred_zoneref->zone, zone)) { - if (fair_skipped) - goto reset_fair; - apply_fair = false; - } - } - /* * When allocating a page cache page for writing, we * want to get it from a node that is within its dirty * limit, such that no single node holds more than its @@ -2986,23 +2937,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, } } - /* - * The first pass makes sure allocations are spread fairly within the - * local node. However, the local node might have free pages left - * after the fairness batches are exhausted, and remote zones haven't - * even been considered yet. Try once more without fairness, and - * include remote zones now, before entering the slowpath and waking - * kswapd: prefer spilling to a remote zone over swapping locally. - */ - if (fair_skipped) { -reset_fair: - apply_fair = false; - fair_skipped = false; - reset_alloc_batches(ac->preferred_zoneref->zone); - z = ac->preferred_zoneref; - goto zonelist_scan; - } - return NULL; } @@ -3753,7 +3687,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, { struct page *page; unsigned int cpuset_mems_cookie; - unsigned int alloc_flags = ALLOC_WMARK_LOW|ALLOC_FAIR; + unsigned int alloc_flags = ALLOC_WMARK_LOW; gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { .high_zoneidx = gfp_zone(gfp_mask), @@ -5954,9 +5888,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) zone_seqlock_init(zone); zone_pcp_init(zone); - /* For bootup, initialized properly in watermark setup */ - mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); - if (!size) continue; @@ -6809,10 +6740,6 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; - __mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - low_wmark_pages(zone) - - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - spin_unlock_irqrestore(&zone->lock, flags); } diff --git a/mm/vmstat.c b/mm/vmstat.c index b58ceb8c872c..ec96d20eeb52 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -920,7 +920,6 @@ int fragmentation_index(struct zone *zone, unsigned int order) const char * const vmstat_text[] = { /* enum zone_stat_item countes */ "nr_free_pages", - "nr_alloc_batch", "nr_zone_anon_lru", "nr_zone_file_lru", "nr_mlock", @@ -1626,10 +1625,9 @@ int vmstat_refresh(struct ctl_table *table, int write, val = atomic_long_read(&vm_zone_stat[i]); if (val < 0) { switch (i) { - case NR_ALLOC_BATCH: case NR_PAGES_SCANNED: /* - * These are often seen to go negative in + * This is often seen to go negative in * recent kernels, but not to go permanently * negative. Whilst it would be nicer not to * have exceptions, rooting them out would be -- 2.6.4
next prev parent reply other threads:[~2016-06-21 14:28 UTC|newest] Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top 2016-06-21 14:15 [PATCH 00/27] Move LRU page reclaim from zones to nodes v7 Mel Gorman 2016-06-21 14:15 ` [PATCH 01/27] mm, vmstat: Add infrastructure for per-node vmstats Mel Gorman 2016-06-21 14:15 ` [PATCH 02/27] mm, vmscan: Move lru_lock to the node Mel Gorman 2016-06-21 14:15 ` [PATCH 03/27] mm, vmscan: Move LRU lists to node Mel Gorman 2016-06-22 12:50 ` Vlastimil Babka 2016-06-21 14:15 ` [PATCH 04/27] mm, vmscan: Begin reclaiming pages on a per-node basis Mel Gorman 2016-06-22 14:04 ` Vlastimil Babka 2016-06-22 16:00 ` Vlastimil Babka 2016-06-23 11:07 ` Mel Gorman 2016-06-23 11:13 ` Michal Hocko 2016-06-23 10:58 ` Mel Gorman 2016-06-21 14:15 ` [PATCH 05/27] mm, vmscan: Have kswapd only scan based on the highest requested zone Mel Gorman 2016-06-21 14:15 ` [PATCH 06/27] mm, vmscan: Make kswapd reclaim in terms of nodes Mel Gorman 2016-06-21 14:15 ` [PATCH 07/27] mm, vmscan: Remove balance gap Mel Gorman 2016-06-21 14:15 ` [PATCH 08/27] mm, vmscan: Simplify the logic deciding whether kswapd sleeps Mel Gorman 2016-06-22 15:30 ` Vlastimil Babka 2016-06-21 14:15 ` [PATCH 09/27] mm, vmscan: By default have direct reclaim only shrink once per node Mel Gorman 2016-06-21 14:15 ` [PATCH 10/27] mm, vmscan: Remove duplicate logic clearing node congestion and dirty state Mel Gorman 2016-06-21 14:15 ` [PATCH 11/27] mm: vmscan: Do not reclaim from kswapd if there is any eligible zone Mel Gorman 2016-06-21 14:15 ` [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric Mel Gorman 2016-06-22 13:20 ` Michal Hocko 2016-06-22 15:42 ` Vlastimil Babka 2016-06-21 14:15 ` [PATCH 13/27] mm, memcg: Move memcg limit enforcement from zones to nodes Mel Gorman 2016-06-22 13:15 ` Michal Hocko 2016-06-21 14:15 ` [PATCH 14/27] mm, workingset: Make working set detection node-aware Mel Gorman 2016-06-21 14:15 ` [PATCH 15/27] mm, page_alloc: Consider dirtyable memory in terms of nodes Mel Gorman 2016-06-22 14:15 ` Michal Hocko 2016-06-22 14:27 ` Michal Hocko 2016-06-23 12:53 ` Mel Gorman 2016-06-23 13:13 ` Michal Hocko 2016-06-21 14:15 ` [PATCH 16/27] mm: Move page mapped accounting to the node Mel Gorman 2016-06-21 22:32 ` Andrew Morton 2016-06-23 8:35 ` Mel Gorman 2016-06-22 14:35 ` Michal Hocko 2016-06-21 14:15 ` [PATCH 17/27] mm: Rename NR_ANON_PAGES to NR_ANON_MAPPED Mel Gorman 2016-06-22 14:28 ` Michal Hocko 2016-06-21 14:15 ` [PATCH 18/27] mm: Move most file-based accounting to the node Mel Gorman 2016-06-22 14:38 ` Michal Hocko 2016-06-21 14:15 ` [PATCH 19/27] mm: Move vmscan writes and file write " Mel Gorman 2016-06-22 14:40 ` Michal Hocko 2016-06-23 13:57 ` Mel Gorman 2016-06-23 14:06 ` Vlastimil Babka 2016-06-23 16:03 ` Mel Gorman 2016-06-21 14:15 ` [PATCH 20/27] mm, vmscan: Update classzone_idx if buffer_heads_over_limit Mel Gorman 2016-06-22 14:49 ` Michal Hocko 2016-06-21 14:16 ` [PATCH 21/27] mm, vmscan: Only wakeup kswapd once per node for the requested classzone Mel Gorman 2016-06-22 16:08 ` Vlastimil Babka 2016-06-21 14:16 ` [PATCH 22/27] mm: Convert zone_reclaim to node_reclaim Mel Gorman 2016-06-21 14:16 ` [PATCH 23/27] mm, vmscan: Add classzone information to tracepoints Mel Gorman 2016-06-21 14:16 ` Mel Gorman [this message] 2016-06-21 14:16 ` [PATCH 25/27] mm: page_alloc: Cache the last node whose dirty limit is reached Mel Gorman 2016-06-21 14:16 ` [PATCH 26/27] mm: vmstat: Replace __count_zone_vm_events with a zone id equivalent Mel Gorman 2016-06-21 14:16 ` [PATCH 27/27] mm: vmstat: Account per-zone stalls and pages skipped during reclaim Mel Gorman 2016-06-23 10:26 ` [PATCH 00/27] Move LRU page reclaim from zones to nodes v7 Mel Gorman 2016-06-23 11:27 ` Michal Hocko 2016-06-23 12:33 ` Mel Gorman 2016-06-23 12:44 ` Michal Hocko 2016-06-23 21:45 ` Andrew Morton 2016-06-24 6:35 ` Balbir Singh 2016-06-24 7:50 ` Mel Gorman 2016-06-27 12:48 ` Balbir Singh -- strict thread matches above, loose matches on Subject: below -- 2016-06-09 18:04 [PATCH 00/27] Move LRU page reclaim from zones to nodes v6 Mel Gorman 2016-06-09 18:04 ` [PATCH 24/27] mm, page_alloc: Remove fair zone allocation policy Mel Gorman 2016-06-17 11:27 ` Vlastimil Babka 2016-06-17 12:07 ` Mel Gorman 2016-04-15 9:13 [PATCH 00/27] Move LRU page reclaim from zones to nodes v5 Mel Gorman 2016-04-15 9:13 ` [PATCH 24/27] mm, page_alloc: Remove fair zone allocation policy Mel Gorman 2016-04-06 11:22 [PATCH 22/27] mm: Convert zone_reclaim to node_reclaim Mel Gorman 2016-04-06 11:22 ` [PATCH 24/27] mm, page_alloc: Remove fair zone allocation policy Mel Gorman
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1466518566-30034-25-git-send-email-mgorman@techsingularity.net \ --to=mgorman@techsingularity.net \ --cc=akpm@linux-foundation.org \ --cc=hannes@cmpxchg.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=riel@surriel.com \ --cc=vbabka@suse.cz \ --subject='Re: [PATCH 24/27] mm, page_alloc: Remove fair zone allocation policy' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).