All of lore.kernel.org
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: guro@fb.com, hannes@cmpxchg.org, mhocko@kernel.org,
	akpm@linux-foundation.org, shakeelb@google.com,
	vdavydov.dev@gmail.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	duanxiongchun@bytedance.com, fam.zheng@bytedance.com,
	bsingharora@gmail.com, shy828301@gmail.com,
	alex.shi@linux.alibaba.com,
	Muchun Song <songmuchun@bytedance.com>
Subject: [RFC PATCH v2 18/18] mm: lru: use lruvec lock to serialize memcg changes
Date: Fri,  9 Apr 2021 20:29:59 +0800	[thread overview]
Message-ID: <20210409122959.82264-19-songmuchun@bytedance.com> (raw)
In-Reply-To: <20210409122959.82264-1-songmuchun@bytedance.com>

As described by commit fc574c23558c ("mm/swap.c: serialize memcg
changes in pagevec_lru_move_fn"), TestClearPageLRU() aims to
serialize mem_cgroup_move_account() during pagevec_lru_move_fn().
Now lock_page_lruvec*() has the ability to detect whether page
memcg has been changed. So we can use lruvec lock to serialize
mem_cgroup_move_account() during pagevec_lru_move_fn(). This
change is a partial revert of the commit fc574c23558c ("mm/swap.c:
serialize memcg changes in pagevec_lru_move_fn").

And pagevec_lru_move_fn() is more hot compare with
mem_cgroup_move_account(), removing an atomic operation would be
an optimization. Also this change would not dirty cacheline for a
page which isn't on the LRU.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/compaction.c |  1 +
 mm/memcontrol.c | 31 +++++++++++++++++++++++++++++++
 mm/swap.c       | 41 +++++++++++------------------------------
 mm/vmscan.c     |  9 ++++-----
 4 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 5fd37e14404f..382e40ccc694 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -531,6 +531,7 @@ compact_lock_page_lruvec_irqsave(struct page *page, unsigned long *flags,
 
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
 out:
+	/* See the comments in lock_page_lruvec(). */
 	if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
 		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
 		goto retry;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 403b0743338b..1017e92f1d82 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1323,12 +1323,38 @@ struct lruvec *lock_page_lruvec(struct page *page)
 	lruvec = mem_cgroup_page_lruvec(page);
 	spin_lock(&lruvec->lru_lock);
 
+	/*
+	 * The memcg of the page can be changed by any the following routines:
+	 *
+	 * 1) mem_cgroup_move_account() or
+	 * 2) memcg_reparent_objcgs()
+	 *
+	 * The possible bad scenario would like:
+	 *
+	 * CPU0:                CPU1:                CPU2:
+	 * lruvec = mem_cgroup_page_lruvec()
+	 *
+	 *                      if (!isolate_lru_page())
+	 *                              mem_cgroup_move_account()
+	 *
+	 *                                           memcg_reparent_objcgs()
+	 *
+	 * spin_lock(&lruvec->lru_lock)
+	 *                ^^^^^^
+	 *              wrong lock
+	 *
+	 * Either CPU1 or CPU2 can change page memcg, so we need to check
+	 * whether page memcg is changed, if so, we should reacquire the
+	 * new lruvec lock.
+	 */
 	if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
 		spin_unlock(&lruvec->lru_lock);
 		goto retry;
 	}
 
 	/*
+	 * When we reach here, it means that the page_memcg(page) is stable.
+	 *
 	 * Preemption is disabled in the internal of spin_lock, which can serve
 	 * as RCU read-side critical sections.
 	 */
@@ -1346,6 +1372,7 @@ struct lruvec *lock_page_lruvec_irq(struct page *page)
 	lruvec = mem_cgroup_page_lruvec(page);
 	spin_lock_irq(&lruvec->lru_lock);
 
+	/* See the comments in lock_page_lruvec(). */
 	if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
 		spin_unlock_irq(&lruvec->lru_lock);
 		goto retry;
@@ -1366,6 +1393,7 @@ struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
 	lruvec = mem_cgroup_page_lruvec(page);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
 
+	/* See the comments in lock_page_lruvec(). */
 	if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
 		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
 		goto retry;
@@ -5687,7 +5715,10 @@ static int mem_cgroup_move_account(struct page *page,
 	obj_cgroup_get(to->objcg);
 	obj_cgroup_put(from->objcg);
 
+	/* See the comments in lock_page_lruvec(). */
+	spin_lock(&from_vec->lru_lock);
 	page->memcg_data = (unsigned long)to->objcg;
+	spin_unlock(&from_vec->lru_lock);
 
 	__unlock_page_objcg(from->objcg);
 
diff --git a/mm/swap.c b/mm/swap.c
index f3ce307d09fa..48e66a05c913 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -211,14 +211,8 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
-		/* block memcg migration during page moving between lru */
-		if (!TestClearPageLRU(page))
-			continue;
-
 		lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
 		(*move_fn)(page, lruvec);
-
-		SetPageLRU(page);
 	}
 	if (lruvec)
 		unlock_page_lruvec_irqrestore(lruvec, flags);
@@ -228,7 +222,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 
 static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec)
 {
-	if (!PageUnevictable(page)) {
+	if (PageLRU(page) && !PageUnevictable(page)) {
 		del_page_from_lru_list(page, lruvec);
 		ClearPageActive(page);
 		add_page_to_lru_list_tail(page, lruvec);
@@ -324,7 +318,7 @@ void lru_note_cost_page(struct page *page)
 
 static void __activate_page(struct page *page, struct lruvec *lruvec)
 {
-	if (!PageActive(page) && !PageUnevictable(page)) {
+	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int nr_pages = thp_nr_pages(page);
 
 		del_page_from_lru_list(page, lruvec);
@@ -377,12 +371,9 @@ static void activate_page(struct page *page)
 	struct lruvec *lruvec;
 
 	page = compound_head(page);
-	if (TestClearPageLRU(page)) {
-		lruvec = lock_page_lruvec_irq(page);
-		__activate_page(page, lruvec);
-		unlock_page_lruvec_irq(lruvec);
-		SetPageLRU(page);
-	}
+	lruvec = lock_page_lruvec_irq(page);
+	__activate_page(page, lruvec);
+	unlock_page_lruvec_irq(lruvec);
 }
 #endif
 
@@ -537,6 +528,9 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 	bool active = PageActive(page);
 	int nr_pages = thp_nr_pages(page);
 
+	if (!PageLRU(page))
+		return;
+
 	if (PageUnevictable(page))
 		return;
 
@@ -574,7 +568,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 
 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
 {
-	if (PageActive(page) && !PageUnevictable(page)) {
+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
 		int nr_pages = thp_nr_pages(page);
 
 		del_page_from_lru_list(page, lruvec);
@@ -590,7 +584,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
 
 static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec)
 {
-	if (PageAnon(page) && PageSwapBacked(page) &&
+	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
 	    !PageSwapCache(page) && !PageUnevictable(page)) {
 		int nr_pages = thp_nr_pages(page);
 
@@ -1055,20 +1049,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
  */
 void __pagevec_lru_add(struct pagevec *pvec)
 {
-	int i;
-	struct lruvec *lruvec = NULL;
-	unsigned long flags = 0;
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-
-		lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
-		__pagevec_lru_add_fn(page, lruvec);
-	}
-	if (lruvec)
-		unlock_page_lruvec_irqrestore(lruvec, flags);
-	release_pages(pvec->pages, pvec->nr);
-	pagevec_reinit(pvec);
+	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn);
 }
 
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index af0fc8110bdc..12c2ea6cb6f3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4469,18 +4469,17 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 		nr_pages = thp_nr_pages(page);
 		pgscanned += nr_pages;
 
-		/* block memcg migration during page moving between lru */
-		if (!TestClearPageLRU(page))
+		lruvec = relock_page_lruvec_irq(page, lruvec);
+
+		if (!PageLRU(page) || !PageUnevictable(page))
 			continue;
 
-		lruvec = relock_page_lruvec_irq(page, lruvec);
-		if (page_evictable(page) && PageUnevictable(page)) {
+		if (page_evictable(page)) {
 			del_page_from_lru_list(page, lruvec);
 			ClearPageUnevictable(page);
 			add_page_to_lru_list(page, lruvec);
 			pgrescued += nr_pages;
 		}
-		SetPageLRU(page);
 	}
 
 	if (lruvec) {
-- 
2.11.0


  parent reply	other threads:[~2021-04-09 12:33 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-09 12:29 [RFC PATCH v2 00/18] Use obj_cgroup APIs to charge the LRU pages Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 01/18] mm: memcontrol: fix page charging in page replacement Muchun Song
2021-04-10 17:44   ` Shakeel Butt
2021-04-10 17:44     ` Shakeel Butt
2021-04-09 12:29 ` [RFC PATCH v2 02/18] mm: memcontrol: bail out early when !mm in get_mem_cgroup_from_mm Muchun Song
2021-04-10 17:47   ` Shakeel Butt
2021-04-10 17:47     ` Shakeel Butt
2021-04-09 12:29 ` [RFC PATCH v2 03/18] mm: memcontrol: remove the pgdata parameter of mem_cgroup_page_lruvec Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 04/18] mm: memcontrol: simplify lruvec_holds_page_lru_lock Muchun Song
2021-04-09 16:00   ` Johannes Weiner
2021-04-11  5:37     ` [External] " Muchun Song
2021-04-11  5:37       ` Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 05/18] mm: memcontrol: simplify the logic of objcg pinning memcg Muchun Song
2021-04-09 16:27   ` Johannes Weiner
2021-04-09 12:29 ` [RFC PATCH v2 06/18] mm: memcontrol: move the objcg infrastructure out of CONFIG_MEMCG_KMEM Muchun Song
2021-04-09 16:56   ` Johannes Weiner
2021-04-11  6:24     ` [External] " Muchun Song
2021-04-11  6:24       ` Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 07/18] mm: memcontrol: introduce compact_lock_page_lruvec_irqsave Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 08/18] mm: memcontrol: make lruvec lock safe when the LRU pages reparented Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 09/18] mm: vmscan: remove noinline_for_stack Muchun Song
2021-04-09 18:31   ` Johannes Weiner
2021-04-10  4:33     ` [External] " Muchun Song
2021-04-10  4:33       ` Muchun Song
2021-04-10 17:48       ` Shakeel Butt
2021-04-10 17:48         ` Shakeel Butt
2021-04-10  0:49   ` Roman Gushchin
2021-04-09 12:29 ` [RFC PATCH v2 10/18] mm: vmscan: rework move_pages_to_lru() Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 11/18] mm: thp: introduce lock/unlock_split_queue{_irqsave}() Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 12/18] mm: thp: make deferred split queue lock safe when the LRU pages reparented Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 13/18] mm: memcontrol: make all the callers of page_memcg() safe Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 14/18] mm: memcontrol: introduce memcg_reparent_ops Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 15/18] mm: memcontrol: use obj_cgroup APIs to charge the LRU pages Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 16/18] mm: memcontrol: rename {un}lock_page_memcg() to {un}lock_page_objcg() Muchun Song
2021-04-09 12:29 ` [RFC PATCH v2 17/18] mm: lru: add VM_BUG_ON_PAGE to lru maintenance function Muchun Song
2021-04-09 12:29 ` Muchun Song [this message]
2021-04-10  1:29 ` [RFC PATCH v2 00/18] Use obj_cgroup APIs to charge the LRU pages Roman Gushchin
2021-04-12 17:14   ` Johannes Weiner
2021-04-12 17:45     ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210409122959.82264-19-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shi@linux.alibaba.com \
    --cc=bsingharora@gmail.com \
    --cc=duanxiongchun@bytedance.com \
    --cc=fam.zheng@bytedance.com \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shakeelb@google.com \
    --cc=shy828301@gmail.com \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.