All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Benjamin Manes <ben.manes@gmail.com>,
	Dave Chinner <david@fromorbit.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Jonathan Corbet <corbet@lwn.net>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	Matthew Wilcox <willy@infradead.org>,
	Mel Gorman <mgorman@suse.de>, Miaohe Lin <linmiaohe@huawei.com>,
	Michael Larabel <michael@michaellarabel.com>,
	Michal Hocko <mhocko@suse.com>,
	Michel Lespinasse <michel@lespinasse.org>,
	Rik van Riel <riel@surriel.com>, Roman Gushchin <guro@fb.com>,
	Rong Chen <rong.a.chen@intel.com>,
	SeongJae Park <sjpark@amazon.de>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	Vlastimil Babka <vbabka@suse.cz>, Yang Shi <shy828301@gmail.com>,
	Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
	linux-kernel@vger.kernel.org, lkp@lists.01.org,
	page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Subject: [PATCH v2 12/16] mm: multigenerational lru: eviction
Date: Tue, 13 Apr 2021 00:56:29 -0600	[thread overview]
Message-ID: <20210413065633.2782273-13-yuzhao@google.com> (raw)
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>

The eviction consumes old generations. Given an lruvec, the eviction
scans the pages on the per-zone lists indexed by either of min_seq[2].
It first tries to select a type based on the values of min_seq[2].
When anon and file types are both available from the same generation,
it selects the one that has a lower refault rate.

During a scan, the eviction sorts pages according to their generation
numbers, if the aging has found them referenced. It also moves pages
from the tiers that have higher refault rates than tier 0 to the next
generation. When it finds all the per-zone lists of a selected type
are empty, the eviction increments min_seq[2] indexed by this selected
type.

Signed-off-by: Yu Zhao <yuzhao@google.com>
---
 mm/vmscan.c | 341 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 341 insertions(+)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 31e1b4155677..6239b1acd84f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5468,6 +5468,347 @@ static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq,
 	return true;
 }
 
+/******************************************************************************
+ *                          the eviction
+ ******************************************************************************/
+
+static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate)
+{
+	bool success;
+	int gen = page_lru_gen(page);
+	int file = page_is_file_lru(page);
+	int zone = page_zonenum(page);
+	int tier = lru_tier_from_usage(page_tier_usage(page));
+	struct lrugen *lrugen = &lruvec->evictable;
+
+	VM_BUG_ON_PAGE(gen == -1, page);
+	VM_BUG_ON_PAGE(tier_to_isolate < 0, page);
+
+	/* a lazy-free page that has been written into? */
+	if (file && PageDirty(page) && PageAnon(page)) {
+		success = lru_gen_deletion(page, lruvec);
+		VM_BUG_ON_PAGE(!success, page);
+		SetPageSwapBacked(page);
+		add_page_to_lru_list_tail(page, lruvec);
+		return true;
+	}
+
+	/* page_update_gen() has updated the page? */
+	if (gen != lru_gen_from_seq(lrugen->min_seq[file])) {
+		list_move(&page->lru, &lrugen->lists[gen][file][zone]);
+		return true;
+	}
+
+	/* activate the page if its tier has a higher refault rate */
+	if (tier_to_isolate < tier) {
+		int sid = sid_from_seq_or_gen(gen);
+
+		page_inc_gen(page, lruvec, false);
+		WRITE_ONCE(lrugen->activated[sid][file][tier - 1],
+			   lrugen->activated[sid][file][tier - 1] + thp_nr_pages(page));
+		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);
+		return true;
+	}
+
+	/*
+	 * A page can't be immediately evicted, and page_inc_gen() will mark it
+	 * for reclaim and hopefully writeback will write it soon if it's dirty.
+	 */
+	if (PageLocked(page) || PageWriteback(page) || (file && PageDirty(page))) {
+		page_inc_gen(page, lruvec, true);
+		return true;
+	}
+
+	return false;
+}
+
+static bool should_skip_page(struct page *page, struct scan_control *sc)
+{
+	if (!sc->may_unmap && page_mapped(page))
+		return true;
+
+	if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
+	    (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
+		return true;
+
+	if (!get_page_unless_zero(page))
+		return true;
+
+	if (!TestClearPageLRU(page)) {
+		put_page(page);
+		return true;
+	}
+
+	return false;
+}
+
+static void isolate_page(struct page *page, struct lruvec *lruvec)
+{
+	bool success;
+
+	success = lru_gen_deletion(page, lruvec);
+	VM_BUG_ON_PAGE(!success, page);
+
+	if (PageActive(page)) {
+		ClearPageActive(page);
+		/* make sure shrink_page_list() rejects this page */
+		SetPageReferenced(page);
+		return;
+	}
+
+	/* make sure shrink_page_list() doesn't try to write this page */
+	ClearPageReclaim(page);
+	/* make sure shrink_page_list() doesn't reject this page */
+	ClearPageReferenced(page);
+}
+
+static int scan_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+			      long *nr_to_scan, int file, int tier,
+			      struct list_head *list)
+{
+	bool success;
+	int gen, zone;
+	enum vm_event_item item;
+	int sorted = 0;
+	int scanned = 0;
+	int isolated = 0;
+	int batch_size = 0;
+	struct lrugen *lrugen = &lruvec->evictable;
+
+	VM_BUG_ON(!list_empty(list));
+
+	if (get_nr_gens(lruvec, file) == MIN_NR_GENS)
+		return -ENOENT;
+
+	gen = lru_gen_from_seq(lrugen->min_seq[file]);
+
+	for (zone = sc->reclaim_idx; zone >= 0; zone--) {
+		LIST_HEAD(moved);
+		int skipped = 0;
+		struct list_head *head = &lrugen->lists[gen][file][zone];
+
+		while (!list_empty(head)) {
+			struct page *page = lru_to_page(head);
+			int delta = thp_nr_pages(page);
+
+			VM_BUG_ON_PAGE(PageTail(page), page);
+			VM_BUG_ON_PAGE(PageUnevictable(page), page);
+			VM_BUG_ON_PAGE(PageActive(page), page);
+			VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page);
+			VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
+
+			prefetchw_prev_lru_page(page, head, flags);
+
+			scanned += delta;
+
+			if (sort_page(page, lruvec, tier))
+				sorted += delta;
+			else if (should_skip_page(page, sc)) {
+				list_move(&page->lru, &moved);
+				skipped += delta;
+			} else {
+				isolate_page(page, lruvec);
+				list_add(&page->lru, list);
+				isolated += delta;
+			}
+
+			if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
+			    ++batch_size == MAX_BATCH_SIZE)
+				break;
+		}
+
+		list_splice(&moved, head);
+		__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
+
+		if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
+		    batch_size == MAX_BATCH_SIZE)
+			break;
+	}
+
+	success = try_inc_min_seq(lruvec, file);
+
+	item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+	if (!cgroup_reclaim(sc))
+		__count_vm_events(item, scanned);
+	__count_memcg_events(lruvec_memcg(lruvec), item, scanned);
+	__count_vm_events(PGSCAN_ANON + file, scanned);
+
+	*nr_to_scan -= scanned;
+
+	if (*nr_to_scan <= 0 || success || isolated)
+		return isolated;
+	/*
+	 * We may have trouble finding eligible pages due to reclaim_idx,
+	 * may_unmap and may_writepage. The following check makes sure we won't
+	 * be stuck if we aren't making enough progress.
+	 */
+	return batch_size == MAX_BATCH_SIZE && sorted >= SWAP_CLUSTER_MAX ? 0 : -ENOENT;
+}
+
+static int get_tier_to_isolate(struct lruvec *lruvec, int file)
+{
+	int tier;
+	struct controller_pos sp, pv;
+
+	/*
+	 * Ideally we don't want to evict upper tiers that have higher refault
+	 * rates. However, we need to leave some margin for the fluctuation in
+	 * refault rates. So we use a larger gain factor to make sure upper
+	 * tiers are indeed more active. We choose 2 because the lowest upper
+	 * tier would have twice of the refault rate of the base tier, according
+	 * to their numbers of accesses.
+	 */
+	read_controller_pos(&sp, lruvec, file, 0, 1);
+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
+		read_controller_pos(&pv, lruvec, file, tier, 2);
+		if (!positive_ctrl_err(&sp, &pv))
+			break;
+	}
+
+	return tier - 1;
+}
+
+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_to_isolate)
+{
+	int file, tier;
+	struct controller_pos sp, pv;
+	int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
+
+	/*
+	 * Compare the refault rates between the base tiers of anon and file to
+	 * determine which type to evict. Also need to compare the refault rates
+	 * of the upper tiers of the selected type with that of the base tier to
+	 * determine which tier of the selected type to evict.
+	 */
+	read_controller_pos(&sp, lruvec, 0, 0, gain[0]);
+	read_controller_pos(&pv, lruvec, 1, 0, gain[1]);
+	file = positive_ctrl_err(&sp, &pv);
+
+	read_controller_pos(&sp, lruvec, !file, 0, gain[!file]);
+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
+		read_controller_pos(&pv, lruvec, file, tier, gain[file]);
+		if (!positive_ctrl_err(&sp, &pv))
+			break;
+	}
+
+	*tier_to_isolate = tier - 1;
+
+	return file;
+}
+
+static int isolate_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+				 int swappiness, long *nr_to_scan, int *type_to_scan,
+				 struct list_head *list)
+{
+	int i;
+	int file;
+	int isolated;
+	int tier = -1;
+	DEFINE_MAX_SEQ();
+	DEFINE_MIN_SEQ();
+
+	VM_BUG_ON(!seq_is_valid(lruvec));
+
+	if (max_nr_gens(max_seq, min_seq, swappiness) == MIN_NR_GENS)
+		return 0;
+	/*
+	 * Try to select a type based on generations and swappiness, and if that
+	 * fails, fall back to get_type_to_scan(). When anon and file are both
+	 * available from the same generation, swappiness 200 is interpreted as
+	 * anon first and swappiness 1 is interpreted as file first.
+	 */
+	file = !swappiness || min_seq[0] > min_seq[1] ||
+	       (min_seq[0] == min_seq[1] && swappiness != 200 &&
+		(swappiness == 1 || get_type_to_scan(lruvec, swappiness, &tier)));
+
+	if (tier == -1)
+		tier = get_tier_to_isolate(lruvec, file);
+
+	for (i = !swappiness; i < ANON_AND_FILE; i++) {
+		isolated = scan_lru_gen_pages(lruvec, sc, nr_to_scan, file, tier, list);
+		if (isolated >= 0)
+			break;
+
+		file = !file;
+		tier = get_tier_to_isolate(lruvec, file);
+	}
+
+	if (isolated < 0)
+		isolated = *nr_to_scan = 0;
+
+	*type_to_scan = file;
+
+	return isolated;
+}
+
+/* Main function used by foreground, background and user-triggered eviction. */
+static bool evict_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+				int swappiness, long *nr_to_scan)
+{
+	int file;
+	int isolated;
+	int reclaimed;
+	LIST_HEAD(list);
+	struct page *page;
+	enum vm_event_item item;
+	struct reclaim_stat stat;
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+
+	spin_lock_irq(&lruvec->lru_lock);
+
+	isolated = isolate_lru_gen_pages(lruvec, sc, swappiness, nr_to_scan, &file, &list);
+	VM_BUG_ON(list_empty(&list) == !!isolated);
+
+	if (isolated)
+		__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, isolated);
+
+	spin_unlock_irq(&lruvec->lru_lock);
+
+	if (!isolated)
+		goto done;
+
+	reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
+	/*
+	 * We need to prevent rejected pages from being added back to the same
+	 * lists they were isolated from. Otherwise we may risk looping on them
+	 * forever. We use PageActive() or !PageReferenced() && PageWorkingset()
+	 * to tell lru_gen_addition() not to add them to the oldest generation.
+	 */
+	list_for_each_entry(page, &list, lru) {
+		if (PageMlocked(page))
+			continue;
+
+		if (PageReferenced(page)) {
+			SetPageActive(page);
+			ClearPageReferenced(page);
+		} else {
+			ClearPageActive(page);
+			SetPageWorkingset(page);
+		}
+	}
+
+	spin_lock_irq(&lruvec->lru_lock);
+
+	move_pages_to_lru(lruvec, &list);
+
+	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -isolated);
+
+	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+	if (!cgroup_reclaim(sc))
+		__count_vm_events(item, reclaimed);
+	__count_memcg_events(lruvec_memcg(lruvec), item, reclaimed);
+	__count_vm_events(PGSTEAL_ANON + file, reclaimed);
+
+	spin_unlock_irq(&lruvec->lru_lock);
+
+	mem_cgroup_uncharge_list(&list);
+	free_unref_page_list(&list);
+
+	sc->nr_reclaimed += reclaimed;
+done:
+	return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim;
+}
+
 /******************************************************************************
  *                          state change
  ******************************************************************************/
-- 
2.31.1.295.g9ea45b61b8-goog


WARNING: multiple messages have this Message-ID (diff)
From: Yu Zhao <yuzhao@google.com>
To: lkp@lists.01.org
Subject: [PATCH v2 12/16] mm: multigenerational lru: eviction
Date: Tue, 13 Apr 2021 00:56:29 -0600	[thread overview]
Message-ID: <20210413065633.2782273-13-yuzhao@google.com> (raw)
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>

[-- Attachment #1: Type: text/plain, Size: 11644 bytes --]

The eviction consumes old generations. Given an lruvec, the eviction
scans the pages on the per-zone lists indexed by either of min_seq[2].
It first tries to select a type based on the values of min_seq[2].
When anon and file types are both available from the same generation,
it selects the one that has a lower refault rate.

During a scan, the eviction sorts pages according to their generation
numbers, if the aging has found them referenced. It also moves pages
from the tiers that have higher refault rates than tier 0 to the next
generation. When it finds all the per-zone lists of a selected type
are empty, the eviction increments min_seq[2] indexed by this selected
type.

Signed-off-by: Yu Zhao <yuzhao@google.com>
---
 mm/vmscan.c | 341 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 341 insertions(+)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 31e1b4155677..6239b1acd84f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5468,6 +5468,347 @@ static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq,
 	return true;
 }
 
+/******************************************************************************
+ *                          the eviction
+ ******************************************************************************/
+
+static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate)
+{
+	bool success;
+	int gen = page_lru_gen(page);
+	int file = page_is_file_lru(page);
+	int zone = page_zonenum(page);
+	int tier = lru_tier_from_usage(page_tier_usage(page));
+	struct lrugen *lrugen = &lruvec->evictable;
+
+	VM_BUG_ON_PAGE(gen == -1, page);
+	VM_BUG_ON_PAGE(tier_to_isolate < 0, page);
+
+	/* a lazy-free page that has been written into? */
+	if (file && PageDirty(page) && PageAnon(page)) {
+		success = lru_gen_deletion(page, lruvec);
+		VM_BUG_ON_PAGE(!success, page);
+		SetPageSwapBacked(page);
+		add_page_to_lru_list_tail(page, lruvec);
+		return true;
+	}
+
+	/* page_update_gen() has updated the page? */
+	if (gen != lru_gen_from_seq(lrugen->min_seq[file])) {
+		list_move(&page->lru, &lrugen->lists[gen][file][zone]);
+		return true;
+	}
+
+	/* activate the page if its tier has a higher refault rate */
+	if (tier_to_isolate < tier) {
+		int sid = sid_from_seq_or_gen(gen);
+
+		page_inc_gen(page, lruvec, false);
+		WRITE_ONCE(lrugen->activated[sid][file][tier - 1],
+			   lrugen->activated[sid][file][tier - 1] + thp_nr_pages(page));
+		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);
+		return true;
+	}
+
+	/*
+	 * A page can't be immediately evicted, and page_inc_gen() will mark it
+	 * for reclaim and hopefully writeback will write it soon if it's dirty.
+	 */
+	if (PageLocked(page) || PageWriteback(page) || (file && PageDirty(page))) {
+		page_inc_gen(page, lruvec, true);
+		return true;
+	}
+
+	return false;
+}
+
+static bool should_skip_page(struct page *page, struct scan_control *sc)
+{
+	if (!sc->may_unmap && page_mapped(page))
+		return true;
+
+	if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
+	    (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
+		return true;
+
+	if (!get_page_unless_zero(page))
+		return true;
+
+	if (!TestClearPageLRU(page)) {
+		put_page(page);
+		return true;
+	}
+
+	return false;
+}
+
+static void isolate_page(struct page *page, struct lruvec *lruvec)
+{
+	bool success;
+
+	success = lru_gen_deletion(page, lruvec);
+	VM_BUG_ON_PAGE(!success, page);
+
+	if (PageActive(page)) {
+		ClearPageActive(page);
+		/* make sure shrink_page_list() rejects this page */
+		SetPageReferenced(page);
+		return;
+	}
+
+	/* make sure shrink_page_list() doesn't try to write this page */
+	ClearPageReclaim(page);
+	/* make sure shrink_page_list() doesn't reject this page */
+	ClearPageReferenced(page);
+}
+
+static int scan_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+			      long *nr_to_scan, int file, int tier,
+			      struct list_head *list)
+{
+	bool success;
+	int gen, zone;
+	enum vm_event_item item;
+	int sorted = 0;
+	int scanned = 0;
+	int isolated = 0;
+	int batch_size = 0;
+	struct lrugen *lrugen = &lruvec->evictable;
+
+	VM_BUG_ON(!list_empty(list));
+
+	if (get_nr_gens(lruvec, file) == MIN_NR_GENS)
+		return -ENOENT;
+
+	gen = lru_gen_from_seq(lrugen->min_seq[file]);
+
+	for (zone = sc->reclaim_idx; zone >= 0; zone--) {
+		LIST_HEAD(moved);
+		int skipped = 0;
+		struct list_head *head = &lrugen->lists[gen][file][zone];
+
+		while (!list_empty(head)) {
+			struct page *page = lru_to_page(head);
+			int delta = thp_nr_pages(page);
+
+			VM_BUG_ON_PAGE(PageTail(page), page);
+			VM_BUG_ON_PAGE(PageUnevictable(page), page);
+			VM_BUG_ON_PAGE(PageActive(page), page);
+			VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page);
+			VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
+
+			prefetchw_prev_lru_page(page, head, flags);
+
+			scanned += delta;
+
+			if (sort_page(page, lruvec, tier))
+				sorted += delta;
+			else if (should_skip_page(page, sc)) {
+				list_move(&page->lru, &moved);
+				skipped += delta;
+			} else {
+				isolate_page(page, lruvec);
+				list_add(&page->lru, list);
+				isolated += delta;
+			}
+
+			if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
+			    ++batch_size == MAX_BATCH_SIZE)
+				break;
+		}
+
+		list_splice(&moved, head);
+		__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
+
+		if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
+		    batch_size == MAX_BATCH_SIZE)
+			break;
+	}
+
+	success = try_inc_min_seq(lruvec, file);
+
+	item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+	if (!cgroup_reclaim(sc))
+		__count_vm_events(item, scanned);
+	__count_memcg_events(lruvec_memcg(lruvec), item, scanned);
+	__count_vm_events(PGSCAN_ANON + file, scanned);
+
+	*nr_to_scan -= scanned;
+
+	if (*nr_to_scan <= 0 || success || isolated)
+		return isolated;
+	/*
+	 * We may have trouble finding eligible pages due to reclaim_idx,
+	 * may_unmap and may_writepage. The following check makes sure we won't
+	 * be stuck if we aren't making enough progress.
+	 */
+	return batch_size == MAX_BATCH_SIZE && sorted >= SWAP_CLUSTER_MAX ? 0 : -ENOENT;
+}
+
+static int get_tier_to_isolate(struct lruvec *lruvec, int file)
+{
+	int tier;
+	struct controller_pos sp, pv;
+
+	/*
+	 * Ideally we don't want to evict upper tiers that have higher refault
+	 * rates. However, we need to leave some margin for the fluctuation in
+	 * refault rates. So we use a larger gain factor to make sure upper
+	 * tiers are indeed more active. We choose 2 because the lowest upper
+	 * tier would have twice of the refault rate of the base tier, according
+	 * to their numbers of accesses.
+	 */
+	read_controller_pos(&sp, lruvec, file, 0, 1);
+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
+		read_controller_pos(&pv, lruvec, file, tier, 2);
+		if (!positive_ctrl_err(&sp, &pv))
+			break;
+	}
+
+	return tier - 1;
+}
+
+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_to_isolate)
+{
+	int file, tier;
+	struct controller_pos sp, pv;
+	int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
+
+	/*
+	 * Compare the refault rates between the base tiers of anon and file to
+	 * determine which type to evict. Also need to compare the refault rates
+	 * of the upper tiers of the selected type with that of the base tier to
+	 * determine which tier of the selected type to evict.
+	 */
+	read_controller_pos(&sp, lruvec, 0, 0, gain[0]);
+	read_controller_pos(&pv, lruvec, 1, 0, gain[1]);
+	file = positive_ctrl_err(&sp, &pv);
+
+	read_controller_pos(&sp, lruvec, !file, 0, gain[!file]);
+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
+		read_controller_pos(&pv, lruvec, file, tier, gain[file]);
+		if (!positive_ctrl_err(&sp, &pv))
+			break;
+	}
+
+	*tier_to_isolate = tier - 1;
+
+	return file;
+}
+
+static int isolate_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+				 int swappiness, long *nr_to_scan, int *type_to_scan,
+				 struct list_head *list)
+{
+	int i;
+	int file;
+	int isolated;
+	int tier = -1;
+	DEFINE_MAX_SEQ();
+	DEFINE_MIN_SEQ();
+
+	VM_BUG_ON(!seq_is_valid(lruvec));
+
+	if (max_nr_gens(max_seq, min_seq, swappiness) == MIN_NR_GENS)
+		return 0;
+	/*
+	 * Try to select a type based on generations and swappiness, and if that
+	 * fails, fall back to get_type_to_scan(). When anon and file are both
+	 * available from the same generation, swappiness 200 is interpreted as
+	 * anon first and swappiness 1 is interpreted as file first.
+	 */
+	file = !swappiness || min_seq[0] > min_seq[1] ||
+	       (min_seq[0] == min_seq[1] && swappiness != 200 &&
+		(swappiness == 1 || get_type_to_scan(lruvec, swappiness, &tier)));
+
+	if (tier == -1)
+		tier = get_tier_to_isolate(lruvec, file);
+
+	for (i = !swappiness; i < ANON_AND_FILE; i++) {
+		isolated = scan_lru_gen_pages(lruvec, sc, nr_to_scan, file, tier, list);
+		if (isolated >= 0)
+			break;
+
+		file = !file;
+		tier = get_tier_to_isolate(lruvec, file);
+	}
+
+	if (isolated < 0)
+		isolated = *nr_to_scan = 0;
+
+	*type_to_scan = file;
+
+	return isolated;
+}
+
+/* Main function used by foreground, background and user-triggered eviction. */
+static bool evict_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+				int swappiness, long *nr_to_scan)
+{
+	int file;
+	int isolated;
+	int reclaimed;
+	LIST_HEAD(list);
+	struct page *page;
+	enum vm_event_item item;
+	struct reclaim_stat stat;
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+
+	spin_lock_irq(&lruvec->lru_lock);
+
+	isolated = isolate_lru_gen_pages(lruvec, sc, swappiness, nr_to_scan, &file, &list);
+	VM_BUG_ON(list_empty(&list) == !!isolated);
+
+	if (isolated)
+		__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, isolated);
+
+	spin_unlock_irq(&lruvec->lru_lock);
+
+	if (!isolated)
+		goto done;
+
+	reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
+	/*
+	 * We need to prevent rejected pages from being added back to the same
+	 * lists they were isolated from. Otherwise we may risk looping on them
+	 * forever. We use PageActive() or !PageReferenced() && PageWorkingset()
+	 * to tell lru_gen_addition() not to add them to the oldest generation.
+	 */
+	list_for_each_entry(page, &list, lru) {
+		if (PageMlocked(page))
+			continue;
+
+		if (PageReferenced(page)) {
+			SetPageActive(page);
+			ClearPageReferenced(page);
+		} else {
+			ClearPageActive(page);
+			SetPageWorkingset(page);
+		}
+	}
+
+	spin_lock_irq(&lruvec->lru_lock);
+
+	move_pages_to_lru(lruvec, &list);
+
+	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -isolated);
+
+	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+	if (!cgroup_reclaim(sc))
+		__count_vm_events(item, reclaimed);
+	__count_memcg_events(lruvec_memcg(lruvec), item, reclaimed);
+	__count_vm_events(PGSTEAL_ANON + file, reclaimed);
+
+	spin_unlock_irq(&lruvec->lru_lock);
+
+	mem_cgroup_uncharge_list(&list);
+	free_unref_page_list(&list);
+
+	sc->nr_reclaimed += reclaimed;
+done:
+	return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim;
+}
+
 /******************************************************************************
  *                          state change
  ******************************************************************************/
-- 
2.31.1.295.g9ea45b61b8-goog

  parent reply	other threads:[~2021-04-13  6:57 UTC|newest]

Thread overview: 164+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-13  6:56 [PATCH v2 00/16] Multigenerational LRU Framework Yu Zhao
2021-04-13  6:56 ` Yu Zhao
2021-04-13  6:56 ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 01/16] include/linux/memcontrol.h: do not warn in page_memcg_rcu() if !CONFIG_MEMCG Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 02/16] include/linux/nodemask.h: define next_memory_node() if !CONFIG_NUMA Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 03/16] include/linux/huge_mm.h: define is_huge_zero_pmd() if !CONFIG_TRANSPARENT_HUGEPAGE Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 04/16] include/linux/cgroup.h: export cgroup_mutex Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 05/16] mm/swap.c: export activate_page() Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 06/16] mm, x86: support the access bit on non-leaf PMD entries Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 07/16] mm/vmscan.c: refactor shrink_node() Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 08/16] mm: multigenerational lru: groundwork Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 09/16] mm: multigenerational lru: activation Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 10/16] mm: multigenerational lru: mm_struct list Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-14 14:36   ` Matthew Wilcox
2021-04-14 14:36     ` Matthew Wilcox
2021-04-13  6:56 ` [PATCH v2 11/16] mm: multigenerational lru: aging Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` Yu Zhao [this message]
2021-04-13  6:56   ` [PATCH v2 12/16] mm: multigenerational lru: eviction Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 13/16] mm: multigenerational lru: page reclaim Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56 ` [PATCH v2 14/16] mm: multigenerational lru: user interface Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13 22:39   ` kernel test robot
2021-04-13 22:39     ` kernel test robot
2021-04-13  6:56 ` [PATCH v2 15/16] mm: multigenerational lru: Kconfig Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13 16:19   ` kernel test robot
2021-04-13 16:19     ` kernel test robot
2021-04-14  4:54   ` kernel test robot
2021-04-14  4:54     ` kernel test robot
2021-04-13  6:56 ` [PATCH v2 16/16] mm: multigenerational lru: documentation Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  6:56   ` Yu Zhao
2021-04-13  7:51 ` [PATCH v2 00/16] Multigenerational LRU Framework SeongJae Park
2021-04-13  7:51   ` SeongJae Park
2021-04-13 16:13   ` Jens Axboe
2021-04-13 16:13     ` Jens Axboe
2021-04-13 16:42     ` SeongJae Park
2021-04-13 16:42       ` SeongJae Park
2021-04-13 23:14     ` Dave Chinner
2021-04-13 23:14       ` Dave Chinner
2021-04-14  2:29       ` Rik van Riel
2021-04-14  2:29         ` Rik van Riel
2021-04-14  2:29         ` Rik van Riel
2021-04-14  4:13         ` Yu Zhao
2021-04-14  4:13           ` Yu Zhao
2021-04-14  6:15           ` Huang, Ying
2021-04-14  6:15             ` Huang, Ying
2021-04-14  6:15             ` Huang, Ying
2021-04-14  7:58             ` Yu Zhao
2021-04-14  7:58               ` Yu Zhao
2021-04-14  7:58               ` Yu Zhao
2021-04-14  8:27               ` Huang, Ying
2021-04-14  8:27                 ` Huang, Ying
2021-04-14  8:27                 ` Huang, Ying
2021-04-14 13:51                 ` Rik van Riel
2021-04-14 13:51                   ` Rik van Riel
2021-04-14 13:51                   ` Rik van Riel
2021-04-14 15:56                   ` Andi Kleen
2021-04-14 15:56                     ` Andi Kleen
2021-04-14 15:58                   ` [page-reclaim] " Shakeel Butt
2021-04-14 15:58                     ` Shakeel Butt
2021-04-14 15:58                     ` Shakeel Butt
2021-04-14 18:45                   ` Yu Zhao
2021-04-14 18:45                     ` Yu Zhao
2021-04-14 18:45                     ` Yu Zhao
2021-04-14 15:51           ` Andi Kleen
2021-04-14 15:51             ` Andi Kleen
2021-04-14 15:58             ` Rik van Riel
2021-04-14 15:58               ` Rik van Riel
2021-04-14 15:58               ` Rik van Riel
2021-04-14 19:14               ` Yu Zhao
2021-04-14 19:14                 ` Yu Zhao
2021-04-14 19:14                 ` Yu Zhao
2021-04-14 19:41                 ` Rik van Riel
2021-04-14 19:41                   ` Rik van Riel
2021-04-14 19:41                   ` Rik van Riel
2021-04-14 20:08                   ` Yu Zhao
2021-04-14 20:08                     ` Yu Zhao
2021-04-14 20:08                     ` Yu Zhao
2021-04-14 19:04             ` Yu Zhao
2021-04-14 19:04               ` Yu Zhao
2021-04-14 19:04               ` Yu Zhao
2021-04-15  3:00               ` Andi Kleen
2021-04-15  3:00                 ` Andi Kleen
2021-04-15  7:13                 ` Yu Zhao
2021-04-15  7:13                   ` Yu Zhao
2021-04-15  7:13                   ` Yu Zhao
2021-04-15  8:19                   ` Huang, Ying
2021-04-15  8:19                     ` Huang, Ying
2021-04-15  8:19                     ` Huang, Ying
2021-04-15  9:57                   ` Michel Lespinasse
2021-04-18  6:48                     ` Michel Lespinasse
2021-04-24  2:33                     ` Yu Zhao
2021-04-24  2:33                       ` Yu Zhao
2021-04-24  2:33                       ` Yu Zhao
2021-04-24  3:30                       ` Andi Kleen
2021-04-24  3:30                         ` Andi Kleen
2021-04-24  4:16                         ` Yu Zhao
2021-04-24  4:16                           ` Yu Zhao
2021-04-24  4:16                           ` Yu Zhao
2021-04-14  3:40       ` Yu Zhao
2021-04-14  3:40         ` Yu Zhao
2021-04-14  3:40         ` Yu Zhao
2021-04-14  4:50         ` Dave Chinner
2021-04-14  4:50           ` Dave Chinner
2021-04-14  7:16           ` Yu Zhao
2021-04-14  7:16             ` Yu Zhao
2021-04-14  7:16             ` Yu Zhao
2021-04-14 10:00             ` Yu Zhao
2021-04-14 10:00               ` Yu Zhao
2021-04-15  1:36             ` Dave Chinner
2021-04-15  1:36               ` Dave Chinner
2021-04-24 21:21               ` Yu Zhao
2021-04-24 21:21                 ` Yu Zhao
2021-04-24 21:21                 ` Yu Zhao
2021-04-14 14:43       ` Jens Axboe
2021-04-14 14:43         ` Jens Axboe
2021-04-14 19:42         ` Yu Zhao
2021-04-14 19:42           ` Yu Zhao
2021-04-14 19:42           ` Yu Zhao
2021-04-15  1:21         ` Dave Chinner
2021-04-15  1:21           ` Dave Chinner
2021-04-14 17:43 ` Johannes Weiner
2021-04-14 17:43   ` Johannes Weiner
2021-04-27 10:35   ` Yu Zhao
2021-04-27 10:35     ` Yu Zhao
2021-04-27 10:35     ` Yu Zhao
2021-04-29 23:46 ` Konstantin Kharlamov
2021-04-29 23:46   ` Konstantin Kharlamov
2021-04-29 23:46   ` Konstantin Kharlamov
2021-04-30  6:37   ` Konstantin Kharlamov
2021-04-30  6:37     ` Konstantin Kharlamov
2021-04-30  6:37     ` Konstantin Kharlamov
2021-04-30 19:31     ` Yu Zhao
2021-04-30 19:31       ` Yu Zhao
2021-04-30 19:31       ` Yu Zhao
2021-04-13 13:29 [PATCH v2 12/16] mm: multigenerational lru: eviction kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210413065633.2782273-13-yuzhao@google.com \
    --to=yuzhao@google.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexs@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=ben.manes@gmail.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@fromorbit.com \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=hdanton@sina.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lkp@lists.01.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=michael@michaellarabel.com \
    --cc=michel@lespinasse.org \
    --cc=page-reclaim@google.com \
    --cc=riel@surriel.com \
    --cc=rong.a.chen@intel.com \
    --cc=shy828301@gmail.com \
    --cc=sjpark@amazon.de \
    --cc=tim.c.chen@linux.intel.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.