linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Shaohua Li <shli@fb.com>
To: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>
Cc: <Kernel-team@fb.com>, <mhocko@suse.com>, <minchan@kernel.org>,
	<hughd@google.com>, <hannes@cmpxchg.org>, <riel@redhat.com>,
	<mgorman@techsingularity.net>, <akpm@linux-foundation.org>
Subject: [PATCH V3 3/7] mm: reclaim MADV_FREE pages
Date: Tue, 14 Feb 2017 11:36:09 -0800	[thread overview]
Message-ID: <cd6a477063c40ad899ad8f4e964c347525ea23a3.1487100204.git.shli@fb.com> (raw)
In-Reply-To: <cover.1487100204.git.shli@fb.com>

When memory pressure is high, we free MADV_FREE pages. If the pages are
not dirty in pte, the pages could be freed immediately. Otherwise we
can't reclaim them. We put the pages back to anonumous LRU list (by
setting SwapBacked flag) and the pages will be reclaimed in normal
swapout way.

We use normal page reclaim policy. Since MADV_FREE pages are put into
inactive file list, such pages and inactive file pages are reclaimed
according to their age. This is expected, because we don't want to
reclaim too many MADV_FREE pages before used once pages.

Based on Minchan's original patch

Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 mm/huge_memory.c |  2 ++
 mm/madvise.c     |  1 +
 mm/rmap.c        | 17 ++++++++++++-----
 mm/vmscan.c      | 30 +++++++++++++++++++++---------
 4 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4ddda58..3bb5ad5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1571,6 +1571,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		set_pmd_at(mm, addr, pmd, orig_pmd);
 		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
 	}
+
+	mark_page_lazyfree(page);
 	ret = true;
 out:
 	spin_unlock(ptl);
diff --git a/mm/madvise.c b/mm/madvise.c
index 639c476..2faed38 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -412,6 +412,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 			set_pte_at(mm, addr, pte, ptent);
 			tlb_remove_tlb_entry(tlb, pte, addr);
 		}
+		mark_page_lazyfree(page);
 	}
 out:
 	if (nr_swap) {
diff --git a/mm/rmap.c b/mm/rmap.c
index af50eae..2cbdada 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1419,11 +1419,18 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			VM_BUG_ON_PAGE(!PageSwapCache(page) && PageSwapBacked(page),
 				page);
 
-			if (!PageDirty(page) && (flags & TTU_LZFREE)) {
-				/* It's a freeable page by MADV_FREE */
-				dec_mm_counter(mm, MM_ANONPAGES);
-				rp->lazyfreed++;
-				goto discard;
+			if (flags & TTU_LZFREE) {
+				if (!PageDirty(page)) {
+					/* It's a freeable page by MADV_FREE */
+					dec_mm_counter(mm, MM_ANONPAGES);
+					rp->lazyfreed++;
+					goto discard;
+				} else {
+					set_pte_at(mm, address, pvmw.pte, pteval);
+					ret = SWAP_FAIL;
+					page_vma_mapped_walk_done(&pvmw);
+					break;
+				}
 			}
 
 			if (swap_duplicate(entry) < 0) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26c3b40..435149c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -911,7 +911,7 @@ static void page_check_dirty_writeback(struct page *page,
 	 * Anonymous pages are not handled by flushers and must be written
 	 * from reclaim context. Do not stall reclaim based on them
 	 */
-	if (!page_is_file_cache(page)) {
+	if (!page_is_file_cache(page) || page_is_lazyfree(page)) {
 		*dirty = false;
 		*writeback = false;
 		return;
@@ -971,7 +971,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		int may_enter_fs;
 		enum page_references references = PAGEREF_RECLAIM_CLEAN;
 		bool dirty, writeback;
-		bool lazyfree = false;
+		bool lazyfree;
 		int ret = SWAP_SUCCESS;
 
 		cond_resched();
@@ -986,6 +986,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 		sc->nr_scanned++;
 
+		lazyfree = page_is_lazyfree(page);
+
 		if (unlikely(!page_evictable(page)))
 			goto cull_mlocked;
 
@@ -993,7 +995,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			goto keep_locked;
 
 		/* Double the slab pressure for mapped and swapcache pages */
-		if (page_mapped(page) || PageSwapCache(page))
+		if ((page_mapped(page) || PageSwapCache(page)) && !lazyfree)
 			sc->nr_scanned++;
 
 		may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
@@ -1119,13 +1121,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		/*
 		 * Anonymous process memory has backing store?
 		 * Try to allocate it some swap space here.
+		 * Lazyfree page could be freed directly
 		 */
-		if (PageAnon(page) && !PageSwapCache(page)) {
+		if (PageAnon(page) && !PageSwapCache(page) && !lazyfree) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
 			if (!add_to_swap(page, page_list))
 				goto activate_locked;
-			lazyfree = true;
 			may_enter_fs = 1;
 
 			/* Adding to swap updated mapping */
@@ -1142,7 +1144,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * The page is mapped into the page tables of one or more
 		 * processes. Try to unmap it here.
 		 */
-		if (page_mapped(page) && mapping) {
+		if (page_mapped(page) && (mapping || lazyfree)) {
 			switch (ret = try_to_unmap(page, lazyfree ?
 				(ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) :
 				(ttu_flags | TTU_BATCH_FLUSH))) {
@@ -1154,7 +1156,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			case SWAP_MLOCK:
 				goto cull_mlocked;
 			case SWAP_LZFREE:
-				goto lazyfree;
+				/* follow __remove_mapping for reference */
+				if (page_ref_freeze(page, 1)) {
+					if (!PageDirty(page))
+						goto lazyfree;
+					else
+						page_ref_unfreeze(page, 1);
+				}
+				goto keep_locked;
 			case SWAP_SUCCESS:
 				; /* try to free the page below */
 			}
@@ -1266,10 +1275,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			}
 		}
 
-lazyfree:
 		if (!mapping || !__remove_mapping(mapping, page, true))
 			goto keep_locked;
-
+lazyfree:
 		/*
 		 * At this point, we have no other references and there is
 		 * no way to pick any more up (removed from LRU, removed
@@ -1294,6 +1302,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 cull_mlocked:
 		if (PageSwapCache(page))
 			try_to_free_swap(page);
+		if (lazyfree)
+			clear_page_lazyfree(page);
 		unlock_page(page);
 		list_add(&page->lru, &ret_pages);
 		continue;
@@ -1303,6 +1313,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (PageSwapCache(page) && mem_cgroup_swap_full(page))
 			try_to_free_swap(page);
 		VM_BUG_ON_PAGE(PageActive(page), page);
+		if (lazyfree)
+			clear_page_lazyfree(page);
 		SetPageActive(page);
 		pgactivate++;
 keep_locked:
-- 
2.9.3

  parent reply	other threads:[~2017-02-14 19:36 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-14 19:36 [PATCH V3 0/7] mm: fix some MADV_FREE issues Shaohua Li
2017-02-14 19:36 ` [PATCH V3 1/7] mm: don't assume anonymous pages have SwapBacked flag Shaohua Li
2017-02-16 17:39   ` Johannes Weiner
2017-02-14 19:36 ` [PATCH V3 2/7] mm: move MADV_FREE pages into LRU_INACTIVE_FILE list Shaohua Li
2017-02-16 17:52   ` Johannes Weiner
2017-02-17  0:35     ` Shaohua Li
2017-02-17 16:22       ` Johannes Weiner
2017-02-14 19:36 ` Shaohua Li [this message]
2017-02-16 18:40   ` [PATCH V3 3/7] mm: reclaim MADV_FREE pages Johannes Weiner
2017-02-17  0:27     ` Shaohua Li
2017-02-17  5:45       ` Minchan Kim
2017-02-17 16:11         ` Johannes Weiner
2017-02-17 16:01       ` Johannes Weiner
2017-02-17 18:43         ` Shaohua Li
2017-02-17 20:03           ` Johannes Weiner
2017-02-17  5:41     ` Minchan Kim
2017-02-17  9:27       ` Minchan Kim
2017-02-17 16:15       ` Johannes Weiner
2017-02-14 19:36 ` [PATCH V3 4/7] mm: enable MADV_FREE for swapless system Shaohua Li
2017-02-17 16:16   ` Johannes Weiner
2017-02-14 19:36 ` [PATCH V3 5/7] mm: add vmstat account for MADV_FREE pages Shaohua Li
2017-02-14 19:36 ` [PATCH V3 6/7] proc: show MADV_FREE pages info in smaps Shaohua Li
2017-02-14 19:36 ` [PATCH V3 7/7] mm: add a separate RSS for MADV_FREE pages Shaohua Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cd6a477063c40ad899ad8f4e964c347525ea23a3.1487100204.git.shli@fb.com \
    --to=shli@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).