All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Shi <alex.shi@linux.alibaba.com>
To: akpm@linux-foundation.org, mgorman@techsingularity.net,
	tj@kernel.org, hughd@google.com, khlebnikov@yandex-team.ru,
	daniel.m.jordan@oracle.com, yang.shi@linux.alibaba.com,
	willy@infradead.org, hannes@cmpxchg.org, lkp@intel.com,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	cgroups@vger.kernel.org, shakeelb@google.com,
	iamjoonsoo.kim@lge.com, richard.weiyang@gmail.com
Subject: [PATCH v13 13/18] mm/swap: serialize memcg changes during pagevec_lru_move_fn
Date: Fri, 19 Jun 2020 16:33:51 +0800	[thread overview]
Message-ID: <1592555636-115095-14-git-send-email-alex.shi@linux.alibaba.com> (raw)
In-Reply-To: <1592555636-115095-1-git-send-email-alex.shi@linux.alibaba.com>

Hugh Dickins' found a memcg change bug on original version:
If we want to change the pgdat->lru_lock to memcg's lruvec lock, we have
to serialize mem_cgroup_move_account during pagevec_lru_move_fn. The
possible bad scenario would like:

	cpu 0					cpu 1
lruvec = mem_cgroup_page_lruvec()
					if (!isolate_lru_page())
						mem_cgroup_move_account

spin_lock_irqsave(&lruvec->lru_lock <== wrong lock.

So we need the ClearPageLRU to block isolate_lru_page(), then serialize
the memcg change here.

Reported-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 mm/swap.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/mm/swap.c b/mm/swap.c
index 7ff9aa6a716a..cebb25ddeee7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -203,7 +203,7 @@ int get_kernel_page(unsigned long start, int write, struct page **pages)
 EXPORT_SYMBOL_GPL(get_kernel_page);
 
 static void pagevec_lru_move_fn(struct pagevec *pvec,
-	void (*move_fn)(struct page *page, struct lruvec *lruvec))
+	void (*move_fn)(struct page *page, struct lruvec *lruvec), bool add)
 {
 	int i;
 	struct pglist_data *pgdat = NULL;
@@ -221,8 +221,15 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 			spin_lock_irqsave(&pgdat->lru_lock, flags);
 		}
 
+		/* new page add to lru or page moving between lru */
+		if (!add && !TestClearPageLRU(page))
+			continue;
+
 		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 		(*move_fn)(page, lruvec);
+
+		if (!add)
+			SetPageLRU(page);
 	}
 	if (pgdat)
 		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
@@ -259,7 +266,7 @@ void rotate_reclaimable_page(struct page *page)
 		local_lock_irqsave(&lru_rotate.lock, flags);
 		pvec = this_cpu_ptr(&lru_rotate.pvec);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
-			pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
+			pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, false);
 		local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 }
@@ -325,7 +332,7 @@ static void activate_page_drain(int cpu)
 	struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
 
 	if (pagevec_count(pvec))
-		pagevec_lru_move_fn(pvec, __activate_page);
+		pagevec_lru_move_fn(pvec, __activate_page, false);
 }
 
 static bool need_activate_page_drain(int cpu)
@@ -343,7 +350,7 @@ void activate_page(struct page *page)
 		pvec = this_cpu_ptr(&lru_pvecs.activate_page);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
-			pagevec_lru_move_fn(pvec, __activate_page);
+			pagevec_lru_move_fn(pvec, __activate_page, false);
 		local_unlock(&lru_pvecs.lock);
 	}
 }
@@ -620,21 +627,21 @@ void lru_add_drain_cpu(int cpu)
 
 		/* No harm done if a racing interrupt already did this */
 		local_lock_irqsave(&lru_rotate.lock, flags);
-		pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
+		pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, false);
 		local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
 
 	pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
 	if (pagevec_count(pvec))
-		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
+		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, false);
 
 	pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
 	if (pagevec_count(pvec))
-		pagevec_lru_move_fn(pvec, lru_deactivate_fn);
+		pagevec_lru_move_fn(pvec, lru_deactivate_fn, false);
 
 	pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
 	if (pagevec_count(pvec))
-		pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
+		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, false);
 
 	activate_page_drain(cpu);
 }
@@ -663,7 +670,7 @@ void deactivate_file_page(struct page *page)
 		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
 
 		if (!pagevec_add(pvec, page) || PageCompound(page))
-			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
+			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, false);
 		local_unlock(&lru_pvecs.lock);
 	}
 }
@@ -685,7 +692,7 @@ void deactivate_page(struct page *page)
 		pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
-			pagevec_lru_move_fn(pvec, lru_deactivate_fn);
+			pagevec_lru_move_fn(pvec, lru_deactivate_fn, false);
 		local_unlock(&lru_pvecs.lock);
 	}
 }
@@ -707,7 +714,7 @@ void mark_page_lazyfree(struct page *page)
 		pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
 		get_page(page);
 		if (!pagevec_add(pvec, page) || PageCompound(page))
-			pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
+			pagevec_lru_move_fn(pvec, lru_lazyfree_fn, false);
 		local_unlock(&lru_pvecs.lock);
 	}
 }
@@ -975,7 +982,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
  */
 void __pagevec_lru_add(struct pagevec *pvec)
 {
-	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn);
+	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, true);
 }
 
 /**
-- 
1.8.3.1


  parent reply	other threads:[~2020-06-19  8:36 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-19  8:33 [PATCH v13 00/18] per memcg lru lock Alex Shi
2020-06-19  8:33 ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 01/18] mm/vmscan: remove unnecessary lruvec adding Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 02/18] mm/page_idle: no unlikely double check for idle page counting Alex Shi
2020-06-19  8:33 ` [PATCH v13 03/18] mm/compaction: correct the comments of compact_defer_shift Alex Shi
2020-06-19  8:33 ` [PATCH v13 04/18] mm/compaction: rename compact_deferred as compact_should_defer Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 05/18] mm/thp: move lru_add_page_tail func to huge_memory.c Alex Shi
2020-06-19  8:33 ` [PATCH v13 06/18] mm/thp: clean up lru_add_page_tail Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 07/18] mm/thp: narrow lru locking Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 08/18] mm/memcg: add debug checking in lock_page_memcg Alex Shi
2020-06-19  8:33 ` [PATCH v13 09/18] mm/swap: fold vm event PGROTATED into pagevec_move_tail_fn Alex Shi
2020-06-19  8:33 ` [PATCH v13 10/18] mm/lru: introduce TestClearPageLRU Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 11/18] mm/compaction: do page isolation first in compaction Alex Shi
2020-06-19  8:33 ` [PATCH v13 12/18] mm/mlock: reorder isolation sequence during munlock Alex Shi
2020-06-19  8:33 ` Alex Shi [this message]
2020-06-19  8:33 ` [PATCH v13 14/18] mm/lru: replace pgdat lru_lock with lruvec lock Alex Shi
2020-06-19  8:33 ` [PATCH v13 15/18] mm/lru: introduce the relock_page_lruvec function Alex Shi
2020-06-19  8:33 ` [PATCH v13 16/18] mm/vmscan: use relock for move_pages_to_lru Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 17/18] mm/pgdat: remove pgdat lru_lock Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-19  8:33 ` [PATCH v13 18/18] mm/lru: revise the comments of lru_lock Alex Shi
2020-06-19  8:33   ` Alex Shi
2020-06-20 23:08 ` [PATCH v13 00/18] per memcg lru lock Andrew Morton
2020-06-20 23:08   ` Andrew Morton
2020-06-21 15:44   ` Alex Shi
2020-06-21 15:44     ` Alex Shi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1592555636-115095-14-git-send-email-alex.shi@linux.alibaba.com \
    --to=alex.shi@linux.alibaba.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=daniel.m.jordan@oracle.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=khlebnikov@yandex-team.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lkp@intel.com \
    --cc=mgorman@techsingularity.net \
    --cc=richard.weiyang@gmail.com \
    --cc=shakeelb@google.com \
    --cc=tj@kernel.org \
    --cc=willy@infradead.org \
    --cc=yang.shi@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.