linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Barry Song <21cnbao@gmail.com>
To: ryan.roberts@arm.com, akpm@linux-foundation.org,
	david@redhat.com, linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org, mhocko@suse.com,
	shy828301@gmail.com, wangkefeng.wang@huawei.com,
	willy@infradead.org, xiang@kernel.org, ying.huang@intel.com,
	yuzhao@google.com, surenb@google.com, steven.price@arm.com,
	Chuanhua Han <hanchuanhua@oppo.com>,
	Barry Song <v-songbaohua@oppo.com>
Subject: [PATCH RFC 6/6] mm: madvise: don't split mTHP for MADV_PAGEOUT
Date: Fri, 19 Jan 2024 00:10:36 +1300	[thread overview]
Message-ID: <20240118111036.72641-7-21cnbao@gmail.com> (raw)
In-Reply-To: <20240118111036.72641-1-21cnbao@gmail.com>

From: Chuanhua Han <hanchuanhua@oppo.com>

MADV_PAGEOUT and MADV_FREE are common cases in Android. Ryan's patchset has
supported swapping large folios out as a whole for vmscan case. This patch
extends the feature to madvise.

If madvised range covers the whole large folio, we don't split it. Otherwise,
we still need to split it.

This patch doesn't depend on ARM64's CONT-PTE, alternatively, it defines one
helper named pte_range_cont_mapped() to check if all PTEs are contiguously
mapped to a large folio.

Signed-off-by: Chuanhua Han <hanchuanhua@oppo.com>
Co-developed-by: Barry Song <v-songbaohua@oppo.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
---
 include/asm-generic/tlb.h | 10 +++++++
 include/linux/pgtable.h   | 60 +++++++++++++++++++++++++++++++++++++++
 mm/madvise.c              | 48 +++++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 129a3a759976..f894e22da5d6 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -608,6 +608,16 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
+#define tlb_remove_nr_tlb_entry(tlb, ptep, address, nr)			\
+	do {                                                    	\
+		int i;							\
+		tlb_flush_pte_range(tlb, address,			\
+				PAGE_SIZE * nr);			\
+		for (i = 0; i < nr; i++)				\
+			__tlb_remove_tlb_entry(tlb, ptep + i,		\
+					address + i * PAGE_SIZE);	\
+	} while (0)
+
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
 	do {							\
 		unsigned long _sz = huge_page_size(h);		\
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 37fe83b0c358..da0c1cf447e3 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -320,6 +320,42 @@ static inline pgd_t pgdp_get(pgd_t *pgdp)
 }
 #endif
 
+#ifndef pte_range_cont_mapped
+static inline bool pte_range_cont_mapped(unsigned long start_pfn,
+					 pte_t *start_pte,
+					 unsigned long start_addr,
+					 int nr)
+{
+	int i;
+	pte_t pte_val;
+
+	for (i = 0; i < nr; i++) {
+		pte_val = ptep_get(start_pte + i);
+
+		if (pte_none(pte_val))
+			return false;
+
+		if (pte_pfn(pte_val) != (start_pfn + i))
+			return false;
+	}
+
+	return true;
+}
+#endif
+
+#ifndef pte_range_young
+static inline bool pte_range_young(pte_t *start_pte, int nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++)
+		if (pte_young(ptep_get(start_pte + i)))
+			return true;
+
+	return false;
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long address,
@@ -580,6 +616,23 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 }
 #endif
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_RANGE_FULL
+static inline pte_t ptep_get_and_clear_range_full(struct mm_struct *mm,
+						  unsigned long start_addr,
+						  pte_t *start_pte,
+						  int nr, int full)
+{
+	int i;
+	pte_t pte;
+
+	pte = ptep_get_and_clear_full(mm, start_addr, start_pte, full);
+
+	for (i = 1; i < nr; i++)
+		ptep_get_and_clear_full(mm, start_addr + i * PAGE_SIZE,
+					start_pte + i, full);
+
+	return pte;
+}
 
 /*
  * If two threads concurrently fault at the same page, the thread that
@@ -995,6 +1048,13 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
 })
 #endif
 
+#ifndef pte_nr_addr_end
+#define pte_nr_addr_end(addr, size, end)				\
+({	unsigned long __boundary = ((addr) + size) & (~(size - 1));	\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+#endif
+
 /*
  * When walking page tables, we usually want to skip any p?d_none entries;
  * and any p?d_bad entries - reporting the error before resetting to none.
diff --git a/mm/madvise.c b/mm/madvise.c
index 912155a94ed5..262460ac4b2e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -452,6 +452,54 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 		if (folio_test_large(folio)) {
 			int err;
 
+			if (!folio_test_pmd_mappable(folio)) {
+				int nr_pages = folio_nr_pages(folio);
+				unsigned long folio_size = PAGE_SIZE * nr_pages;
+				unsigned long start_addr = ALIGN_DOWN(addr, nr_pages * PAGE_SIZE);;
+				unsigned long start_pfn = page_to_pfn(folio_page(folio, 0));
+				pte_t *start_pte = pte - (addr - start_addr) / PAGE_SIZE;
+				unsigned long next = pte_nr_addr_end(addr, folio_size, end);
+
+				if (!pte_range_cont_mapped(start_pfn, start_pte, start_addr, nr_pages))
+					goto split;
+
+				if (next - addr != folio_size) {
+					goto split;
+				} else {
+					/* Do not interfere with other mappings of this page */
+					if (folio_estimated_sharers(folio) != 1)
+						goto skip;
+
+					VM_BUG_ON(addr != start_addr || pte != start_pte);
+
+					if (pte_range_young(start_pte, nr_pages)) {
+						ptent = ptep_get_and_clear_range_full(mm, start_addr, start_pte,
+										      nr_pages, tlb->fullmm);
+						ptent = pte_mkold(ptent);
+
+						set_ptes(mm, start_addr, start_pte, ptent, nr_pages);
+						tlb_remove_nr_tlb_entry(tlb, start_pte, start_addr, nr_pages);
+					}
+
+					folio_clear_referenced(folio);
+					folio_test_clear_young(folio);
+					if (pageout) {
+						if (folio_isolate_lru(folio)) {
+							if (folio_test_unevictable(folio))
+								folio_putback_lru(folio);
+							else
+								list_add(&folio->lru, &folio_list);
+						}
+					} else
+						folio_deactivate(folio);
+				}
+skip:
+				pte += (next - PAGE_SIZE - (addr & PAGE_MASK))/PAGE_SIZE;
+				addr = next - PAGE_SIZE;
+				continue;
+
+			}
+split:
 			if (folio_estimated_sharers(folio) != 1)
 				break;
 			if (pageout_anon_only_filter && !folio_test_anon(folio))
-- 
2.34.1



  parent reply	other threads:[~2024-01-18 11:12 UTC|newest]

Thread overview: 116+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-25 14:45 [PATCH v3 0/4] Swap-out small-sized THP without splitting Ryan Roberts
2023-10-25 14:45 ` [PATCH v3 1/4] mm: swap: Remove CLUSTER_FLAG_HUGE from swap_cluster_info:flags Ryan Roberts
2024-02-22 10:19   ` David Hildenbrand
2024-02-22 10:20     ` David Hildenbrand
2024-02-26 17:41       ` Ryan Roberts
2024-02-27 17:10         ` Ryan Roberts
2024-02-27 19:17           ` David Hildenbrand
2024-02-28  9:37             ` Ryan Roberts
2024-02-28 12:12               ` David Hildenbrand
2024-02-28 14:57                 ` Ryan Roberts
2024-02-28 15:12                   ` David Hildenbrand
2024-02-28 15:18                     ` Ryan Roberts
2024-03-01 16:27                     ` Ryan Roberts
2024-03-01 16:31                       ` Matthew Wilcox
2024-03-01 16:44                         ` Ryan Roberts
2024-03-01 17:00                           ` David Hildenbrand
2024-03-01 17:14                             ` Ryan Roberts
2024-03-01 17:18                               ` David Hildenbrand
2024-03-01 17:06                           ` Ryan Roberts
2024-03-04  4:52                             ` Barry Song
2024-03-04  5:42                               ` Barry Song
2024-03-05  7:41                                 ` Ryan Roberts
2024-03-01 16:31                       ` Ryan Roberts
2024-03-01 16:32                       ` David Hildenbrand
2024-03-04 16:03                 ` Ryan Roberts
2024-03-04 17:30                   ` David Hildenbrand
2024-03-04 18:38                     ` Ryan Roberts
2024-03-04 20:50                       ` David Hildenbrand
2024-03-04 21:55                         ` Ryan Roberts
2024-03-04 22:02                           ` David Hildenbrand
2024-03-04 22:34                             ` Ryan Roberts
2024-03-05  6:11                               ` Huang, Ying
2024-03-05  8:35                                 ` David Hildenbrand
2024-03-05  8:46                                   ` Ryan Roberts
2024-02-28 13:33               ` Matthew Wilcox
2024-02-28 14:24                 ` Ryan Roberts
2024-02-28 14:59                   ` Ryan Roberts
2023-10-25 14:45 ` [PATCH v3 2/4] mm: swap: Remove struct percpu_cluster Ryan Roberts
2023-10-25 14:45 ` [PATCH v3 3/4] mm: swap: Simplify ssd behavior when scanner steals entry Ryan Roberts
2023-10-25 14:45 ` [PATCH v3 4/4] mm: swap: Swap-out small-sized THP without splitting Ryan Roberts
2023-10-30  8:18   ` Huang, Ying
2023-10-30 13:59     ` Ryan Roberts
2023-10-31  8:12       ` Huang, Ying
2023-11-03 11:42         ` Ryan Roberts
2023-11-02  7:40   ` Barry Song
2023-11-02 10:21     ` Ryan Roberts
2023-11-02 22:36       ` Barry Song
2023-11-03 11:31         ` Ryan Roberts
2023-11-03 13:57           ` Steven Price
2023-11-04  9:34             ` Barry Song
2023-11-06 10:12               ` Steven Price
2023-11-06 21:39                 ` Barry Song
2023-11-08 11:51                   ` Steven Price
2023-11-07 12:46               ` Ryan Roberts
2023-11-07 18:05                 ` Barry Song
2023-11-08 11:23                   ` Barry Song
2023-11-08 20:20                     ` Ryan Roberts
2023-11-08 21:04                       ` Barry Song
2023-11-04  5:49           ` Barry Song
2024-02-05  9:51   ` Barry Song
2024-02-05 12:14     ` Ryan Roberts
2024-02-18 23:40       ` Barry Song
2024-02-20 20:03         ` Ryan Roberts
2024-03-05  9:00         ` Ryan Roberts
2024-03-05  9:54           ` Barry Song
2024-03-05 10:44             ` Ryan Roberts
2024-02-27 12:28     ` Ryan Roberts
2024-02-27 13:37     ` Ryan Roberts
2024-02-28  2:46       ` Barry Song
2024-02-22  7:05   ` Barry Song
2024-02-22 10:09     ` David Hildenbrand
2024-02-23  9:46       ` Barry Song
2024-02-27 12:05         ` Ryan Roberts
2024-02-28  1:23           ` Barry Song
2024-02-28  9:34             ` David Hildenbrand
2024-02-28 23:18               ` Barry Song
2024-02-28 15:57             ` Ryan Roberts
2023-11-29  7:47 ` [PATCH v3 0/4] " Barry Song
2023-11-29 12:06   ` Ryan Roberts
2023-11-29 20:38     ` Barry Song
2024-01-18 11:10 ` [PATCH RFC 0/6] mm: support large folios swap-in Barry Song
2024-01-18 11:10   ` [PATCH RFC 1/6] arm64: mm: swap: support THP_SWAP on hardware with MTE Barry Song
2024-01-26 23:14     ` Chris Li
2024-02-26  2:59       ` Barry Song
2024-01-18 11:10   ` [PATCH RFC 2/6] mm: swap: introduce swap_nr_free() for batched swap_free() Barry Song
2024-01-26 23:17     ` Chris Li
2024-02-26  4:47       ` Barry Song
2024-01-18 11:10   ` [PATCH RFC 3/6] mm: swap: make should_try_to_free_swap() support large-folio Barry Song
2024-01-26 23:22     ` Chris Li
2024-01-18 11:10   ` [PATCH RFC 4/6] mm: support large folios swapin as a whole Barry Song
2024-01-27 19:53     ` Chris Li
2024-02-26  7:29       ` Barry Song
2024-01-27 20:06     ` Chris Li
2024-02-26  7:31       ` Barry Song
2024-01-18 11:10   ` [PATCH RFC 5/6] mm: rmap: weaken the WARN_ON in __folio_add_anon_rmap() Barry Song
2024-01-18 11:54     ` David Hildenbrand
2024-01-23  6:49       ` Barry Song
2024-01-29  3:25         ` Chris Li
2024-01-29 10:06           ` David Hildenbrand
2024-01-29 16:31             ` Chris Li
2024-02-26  5:05               ` Barry Song
2024-04-06 23:27             ` Barry Song
2024-01-27 23:41     ` Chris Li
2024-01-18 11:10   ` Barry Song [this message]
2024-01-29  2:15     ` [PATCH RFC 6/6] mm: madvise: don't split mTHP for MADV_PAGEOUT Chris Li
2024-02-26  6:39       ` Barry Song
2024-02-27 12:22     ` Ryan Roberts
2024-02-27 22:39       ` Barry Song
2024-02-27 14:40     ` Ryan Roberts
2024-02-27 18:57       ` Barry Song
2024-02-28  3:49         ` Barry Song
2024-01-18 15:25   ` [PATCH RFC 0/6] mm: support large folios swap-in Ryan Roberts
2024-01-18 23:54     ` Barry Song
2024-01-19 13:25       ` Ryan Roberts
2024-01-27 14:27         ` Barry Song
2024-01-29  9:05   ` Huang, Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240118111036.72641-7-21cnbao@gmail.com \
    --to=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=hanchuanhua@oppo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=ryan.roberts@arm.com \
    --cc=shy828301@gmail.com \
    --cc=steven.price@arm.com \
    --cc=surenb@google.com \
    --cc=v-songbaohua@oppo.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=willy@infradead.org \
    --cc=xiang@kernel.org \
    --cc=ying.huang@intel.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).