* [PATCH stable-4.9 1/2] mm, vmscan: cleanup lru size claculations
2017-02-28 15:11 [PATCH stable-4.9 0/2] mm: follow up oom fixes for 32b Michal Hocko
@ 2017-02-28 15:11 ` Michal Hocko
2017-02-28 15:11 ` [PATCH stable-4.9 2/2] mm, vmscan: consider eligible zones in get_scan_count Michal Hocko
2017-03-08 12:51 ` [PATCH stable-4.9 0/2] mm: follow up oom fixes for 32b Greg KH
2 siblings, 0 replies; 6+ messages in thread
From: Michal Hocko @ 2017-02-28 15:11 UTC (permalink / raw)
To: Stable tree
Cc: LKML, linux-mm, Michal Hocko, Andrew Morton, Linus Torvalds,
Hillf Danton, Johannes Weiner, Mel Gorman, Minchan Kim
From: Michal Hocko <mhocko@suse.com>
commit fd538803731e50367b7c59ce4ad3454426a3d671 upstream.
lruvec_lru_size returns the full size of the LRU list while we sometimes
need a value reduced only to eligible zones (e.g. for lowmem requests).
inactive_list_is_low is one such user. Later patches will add more of
them. Add a new parameter to lruvec_lru_size and allow it filter out
zones which are not eligible for the given context.
Link: http://lkml.kernel.org/r/20170117103702.28542-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
include/linux/mmzone.h | 2 +-
mm/vmscan.c | 81 ++++++++++++++++++++++++--------------------------
mm/workingset.c | 2 +-
3 files changed, 41 insertions(+), 44 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f99c993dd500..7e273e243a13 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -779,7 +779,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
#endif
}
-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru);
+extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
#ifdef CONFIG_HAVE_MEMORY_PRESENT
void memory_present(int nid, unsigned long start, unsigned long end);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fa30010a5277..cd516c632e8f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -234,22 +234,39 @@ bool pgdat_reclaimable(struct pglist_data *pgdat)
pgdat_reclaimable_pages(pgdat) * 6;
}
-unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
+/**
+ * lruvec_lru_size - Returns the number of pages on the given LRU list.
+ * @lruvec: lru vector
+ * @lru: lru to use
+ * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list)
+ */
+unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
{
+ unsigned long lru_size;
+ int zid;
+
if (!mem_cgroup_disabled())
- return mem_cgroup_get_lru_size(lruvec, lru);
+ lru_size = mem_cgroup_get_lru_size(lruvec, lru);
+ else
+ lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
- return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
-}
+ for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
+ struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
+ unsigned long size;
-unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru,
- int zone_idx)
-{
- if (!mem_cgroup_disabled())
- return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx);
+ if (!managed_zone(zone))
+ continue;
+
+ if (!mem_cgroup_disabled())
+ size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
+ else
+ size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid],
+ NR_ZONE_LRU_BASE + lru);
+ lru_size -= min(size, lru_size);
+ }
+
+ return lru_size;
- return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx],
- NR_ZONE_LRU_BASE + lru);
}
/*
@@ -2028,11 +2045,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
struct scan_control *sc)
{
unsigned long inactive_ratio;
- unsigned long inactive;
- unsigned long active;
+ unsigned long inactive, active;
+ enum lru_list inactive_lru = file * LRU_FILE;
+ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
unsigned long gb;
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- int zid;
/*
* If we don't have swap space, anonymous page deactivation
@@ -2041,27 +2057,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
if (!file && !total_swap_pages)
return false;
- inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
- active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
-
- /*
- * For zone-constrained allocations, it is necessary to check if
- * deactivations are required for lowmem to be reclaimed. This
- * calculates the inactive/active pages available in eligible zones.
- */
- for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) {
- struct zone *zone = &pgdat->node_zones[zid];
- unsigned long inactive_zone, active_zone;
-
- if (!managed_zone(zone))
- continue;
-
- inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid);
- active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid);
-
- inactive -= min(inactive, inactive_zone);
- active -= min(active, active_zone);
- }
+ inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
+ active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
@@ -2208,7 +2205,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* system is under heavy pressure.
*/
if (!inactive_list_is_low(lruvec, true, sc) &&
- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -2234,10 +2231,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* anon in [0], file in [1]
*/
- anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) +
- lruvec_lru_size(lruvec, LRU_INACTIVE_ANON);
- file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) +
- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
+ anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+ lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+ file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
spin_lock_irq(&pgdat->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
@@ -2275,7 +2272,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
unsigned long size;
unsigned long scan;
- size = lruvec_lru_size(lruvec, lru);
+ size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES);
scan = size >> sc->priority;
if (!scan && pass && force_scan)
diff --git a/mm/workingset.c b/mm/workingset.c
index fb1f9183d89a..33f6f4db32fd 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -266,7 +266,7 @@ bool workingset_refault(void *shadow)
}
lruvec = mem_cgroup_lruvec(pgdat, memcg);
refault = atomic_long_read(&lruvec->inactive_age);
- active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
+ active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
rcu_read_unlock();
/*
--
2.11.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH stable-4.9 2/2] mm, vmscan: consider eligible zones in get_scan_count
2017-02-28 15:11 [PATCH stable-4.9 0/2] mm: follow up oom fixes for 32b Michal Hocko
2017-02-28 15:11 ` [PATCH stable-4.9 1/2] mm, vmscan: cleanup lru size claculations Michal Hocko
@ 2017-02-28 15:11 ` Michal Hocko
2017-03-06 18:00 ` MegaBrutal
2017-03-08 12:51 ` [PATCH stable-4.9 0/2] mm: follow up oom fixes for 32b Greg KH
2 siblings, 1 reply; 6+ messages in thread
From: Michal Hocko @ 2017-02-28 15:11 UTC (permalink / raw)
To: Stable tree
Cc: LKML, linux-mm, Michal Hocko, Andrew Morton, Linus Torvalds,
Hillf Danton, Johannes Weiner, Mel Gorman, Minchan Kim,
Trevor Cordes
From: Michal Hocko <mhocko@suse.com>
commit 71ab6cfe88dcf9f6e6a65eb85cf2bda20a257682 upstream.
get_scan_count() considers the whole node LRU size when
- doing SCAN_FILE due to many page cache inactive pages
- calculating the number of pages to scan
In both cases this might lead to unexpected behavior especially on 32b
systems where we can expect lowmem memory pressure very often.
A large highmem zone can easily distort SCAN_FILE heuristic because
there might be only few file pages from the eligible zones on the node
lru and we would still enforce file lru scanning which can lead to
trashing while we could still scan anonymous pages.
The later use of lruvec_lru_size can be problematic as well. Especially
when there are not many pages from the eligible zones. We would have to
skip over many pages to find anything to reclaim but shrink_node_memcg
would only reduce the remaining number to scan by SWAP_CLUSTER_MAX at
maximum. Therefore we can end up going over a large LRU many times
without actually having chance to reclaim much if anything at all. The
closer we are out of memory on lowmem zone the worse the problem will
be.
Fix this by filtering out all the ineligible zones when calculating the
lru size for both paths and consider only sc->reclaim_idx zones.
The patch would need to be tweaked a bit to apply to 4.10 and older but
I will do that as soon as it hits the Linus tree in the next merge
window.
Link: http://lkml.kernel.org/r/20170117103702.28542-3-mhocko@kernel.org
Fixes: b2e18757f2c9 ("mm, vmscan: begin reclaiming pages on a per-node basis")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Tested-by: Trevor Cordes <trevor@tecnopolis.ca>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
mm/vmscan.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cd516c632e8f..30a88b945a44 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2205,7 +2205,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* system is under heavy pressure.
*/
if (!inactive_list_is_low(lruvec, true, sc) &&
- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES) >> sc->priority) {
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -2272,7 +2272,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
unsigned long size;
unsigned long scan;
- size = lruvec_lru_size(lruvec, lru, MAX_NR_ZONES);
+ size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
scan = size >> sc->priority;
if (!scan && pass && force_scan)
--
2.11.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH stable-4.9 0/2] mm: follow up oom fixes for 32b
2017-02-28 15:11 [PATCH stable-4.9 0/2] mm: follow up oom fixes for 32b Michal Hocko
2017-02-28 15:11 ` [PATCH stable-4.9 1/2] mm, vmscan: cleanup lru size claculations Michal Hocko
2017-02-28 15:11 ` [PATCH stable-4.9 2/2] mm, vmscan: consider eligible zones in get_scan_count Michal Hocko
@ 2017-03-08 12:51 ` Greg KH
2 siblings, 0 replies; 6+ messages in thread
From: Greg KH @ 2017-03-08 12:51 UTC (permalink / raw)
To: Michal Hocko
Cc: Stable tree, LKML, linux-mm, Andrew Morton, Hillf Danton,
Johannes Weiner, Linus Torvalds, Mel Gorman, Michal Hocko,
Minchan Kim, Trevor Cordes
On Tue, Feb 28, 2017 at 04:11:06PM +0100, Michal Hocko wrote:
> Hi,
> later in the 4.10 release cycle it turned out that b4536f0c829c ("mm,
> memcg: fix the active list aging for lowmem requests when memcg is
> enabled") was not sufficient to fully close the regression introduced by
> f8d1a31163fc ("mm: consider whether to decivate based on eligible zones
> inactive ratio") [1]. mmotm tree behaved properly and it turned out the
> Linus tree was missing 71ab6cfe88dc ("mm, vmscan: consider eligible
> zones in get_scan_count") merged in 4.11 merge window. The patch heavily
> depends on 4a9494a3d827 ("mm, vmscan: cleanup lru size claculations")
> which has been backported as well (patch 1).
>
> Please add these two to 4.9+ trees (they should apply to 4.10 as they
> are). 4.8 tree will need them as well but I do not see this stable tree
> being maintained.
For 4.10-stable I needed both of your backports, many thanks for them.
greg k-h
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 6+ messages in thread