All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chih-En Lin <shiyn.lin@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Qi Zheng <zhengqi.arch@bytedance.com>,
	David Hildenbrand <david@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	John Hubbard <jhubbard@nvidia.com>, Nadav Amit <namit@vmware.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Steven Rostedt <rostedt@goodmis.org>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Yang Shi <shy828301@gmail.com>, Peter Xu <peterx@redhat.com>,
	Zach O'Keefe <zokeefe@google.com>,
	"Liam R . Howlett" <Liam.Howlett@Oracle.com>,
	Alex Sierra <alex.sierra@amd.com>,
	Xianting Tian <xianting.tian@linux.alibaba.com>,
	Colin Cross <ccross@google.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Barry Song <baohua@kernel.org>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	Suleiman Souhlal <suleiman@google.com>,
	Brian Geffon <bgeffon@google.com>, Yu Zhao <yuzhao@google.com>,
	Tong Tiangen <tongtiangen@huawei.com>,
	Liu Shixin <liushixin2@huawei.com>, Li kunyu <kunyu@nfschina.com>,
	Anshuman Khandual <anshuman.khandual@arm.com>,
	Vlastimil Babka <vbabka@suse.cz>, Hugh Dickins <hughd@google.com>,
	Minchan Kim <minchan@kernel.org>,
	Miaohe Lin <linmiaohe@huawei.com>,
	Gautam Menghani <gautammenghani201@gmail.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Mark Brown <broonie@kernel.org>, Will Deacon <will@kernel.org>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Andy Lutomirski <luto@kernel.org>,
	Fenghua Yu <fenghua.yu@intel.com>,
	Barret Rhoden <brho@google.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>,
	Dinglan Peng <peng301@purdue.edu>,
	Pedro Fonseca <pfonseca@purdue.edu>,
	Jim Huang <jserv@ccns.ncku.edu.tw>,
	Huichun Feng <foxhoundsk.tw@gmail.com>,
	Chih-En Lin <shiyn.lin@gmail.com>
Subject: [PATCH v3 04/14] mm/rmap: Break COW PTE in rmap walking
Date: Tue, 20 Dec 2022 15:27:33 +0800	[thread overview]
Message-ID: <20221220072743.3039060-5-shiyn.lin@gmail.com> (raw)
In-Reply-To: <20221220072743.3039060-1-shiyn.lin@gmail.com>

Some of the features (unmap, migrate, device exclusive, mkclean, etc)
might modify the pte entry via rmap. Add a new page vma mapped walk
flag, PVMW_BREAK_COW_PTE, to indicate the rmap walking to break COW PTE.

Signed-off-by: Chih-En Lin <shiyn.lin@gmail.com>
---
 include/linux/rmap.h |  2 ++
 mm/migrate.c         |  3 ++-
 mm/page_vma_mapped.c |  2 ++
 mm/rmap.c            | 12 +++++++-----
 mm/vmscan.c          |  7 ++++++-
 5 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bd3504d11b155..d0f07e5519736 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -368,6 +368,8 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
 #define PVMW_SYNC		(1 << 0)
 /* Look for migration entries rather than present PTEs */
 #define PVMW_MIGRATION		(1 << 1)
+/* Break COW-ed PTE during walking */
+#define PVMW_BREAK_COW_PTE	(1 << 2)
 
 struct page_vma_mapped_walk {
 	unsigned long pfn;
diff --git a/mm/migrate.c b/mm/migrate.c
index dff333593a8ae..a4be7e04c9b09 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -174,7 +174,8 @@ void putback_movable_pages(struct list_head *l)
 static bool remove_migration_pte(struct folio *folio,
 		struct vm_area_struct *vma, unsigned long addr, void *old)
 {
-	DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+	DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr,
+			      PVMW_SYNC | PVMW_MIGRATION | PVMW_BREAK_COW_PTE);
 
 	while (page_vma_mapped_walk(&pvmw)) {
 		rmap_t rmap_flags = RMAP_NONE;
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 93e13fc17d3cb..5dfc9236dc505 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -251,6 +251,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 			step_forward(pvmw, PMD_SIZE);
 			continue;
 		}
+		if (pvmw->flags & PVMW_BREAK_COW_PTE)
+			break_cow_pte(vma, pvmw->pmd, pvmw->address);
 		if (!map_pte(pvmw))
 			goto next_pte;
 this_pte:
diff --git a/mm/rmap.c b/mm/rmap.c
index 2ec925e5fa6a9..b1b7dcbd498be 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -807,7 +807,8 @@ static bool folio_referenced_one(struct folio *folio,
 		struct vm_area_struct *vma, unsigned long address, void *arg)
 {
 	struct folio_referenced_arg *pra = arg;
-	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
+	/* it will clear the entry, so we should break COW PTE. */
+	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_BREAK_COW_PTE);
 	int referenced = 0;
 
 	while (page_vma_mapped_walk(&pvmw)) {
@@ -1012,7 +1013,8 @@ static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw)
 static bool page_mkclean_one(struct folio *folio, struct vm_area_struct *vma,
 			     unsigned long address, void *arg)
 {
-	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_SYNC);
+	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address,
+			      PVMW_SYNC | PVMW_BREAK_COW_PTE);
 	int *cleaned = arg;
 
 	*cleaned += page_vma_mkclean_one(&pvmw);
@@ -1471,7 +1473,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 		     unsigned long address, void *arg)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
+	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_BREAK_COW_PTE);
 	pte_t pteval;
 	struct page *subpage;
 	bool anon_exclusive, ret = true;
@@ -1842,7 +1844,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 		     unsigned long address, void *arg)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
+	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_BREAK_COW_PTE);
 	pte_t pteval;
 	struct page *subpage;
 	bool anon_exclusive, ret = true;
@@ -2195,7 +2197,7 @@ static bool page_make_device_exclusive_one(struct folio *folio,
 		struct vm_area_struct *vma, unsigned long address, void *priv)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
+	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_BREAK_COW_PTE);
 	struct make_exclusive_args *args = priv;
 	pte_t pteval;
 	struct page *subpage;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 026199c047e0e..980d2056adfd1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1781,6 +1781,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 			}
 		}
 
+		/*
+		 * Break COW PTE since checking the reference
+		 * of folio might modify the PTE.
+		 */
 		if (!ignore_references)
 			references = folio_check_references(folio, sc);
 
@@ -1864,7 +1868,8 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 
 		/*
 		 * The folio is mapped into the page tables of one or more
-		 * processes. Try to unmap it here.
+		 * processes. Try to unmap it here. Also, since it will write
+		 * to the page tables, break COW PTE if they are.
 		 */
 		if (folio_mapped(folio)) {
 			enum ttu_flags flags = TTU_BATCH_FLUSH;
-- 
2.37.3


  parent reply	other threads:[~2022-12-20  7:26 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-20  7:27 [PATCH v3 00/14] Introduce Copy-On-Write to Page Table Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 01/14] mm: Allow user to control COW PTE via prctl Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 02/14] mm: Add Copy-On-Write PTE to fork() Chih-En Lin
2022-12-20 10:59   ` kernel test robot
2022-12-20 12:42   ` kernel test robot
2022-12-20  7:27 ` [PATCH v3 03/14] mm: Add break COW PTE fault and helper functions Chih-En Lin
2022-12-20 12:00   ` kernel test robot
2022-12-20  7:27 ` Chih-En Lin [this message]
2022-12-26  9:40   ` [PATCH v3 04/14] mm/rmap: Break COW PTE in rmap walking Barry Song
2022-12-26 10:59     ` Chih-En Lin
2022-12-27  1:15       ` Barry Song
2022-12-27  2:40         ` Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 05/14] mm/khugepaged: Break COW PTE before scanning pte Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 06/14] mm/ksm: Break COW PTE before modify shared PTE Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 07/14] mm/madvise: Handle COW-ed PTE with madvise() Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 08/14] mm/gup: Break COW PTE in follow_pfn_pte() Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 09/14] mm/mprotect: Break COW PTE before changing protection Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 10/14] mm/userfaultfd: Support COW PTE Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 11/14] mm/migrate_device: " Chih-En Lin
2022-12-20 11:50   ` kernel test robot
2022-12-20 14:23   ` kernel test robot
2022-12-20  7:27 ` [PATCH v3 12/14] fs/proc: Support COW PTE with clear_refs_write Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 13/14] events/uprobes: Break COW PTE before replacing page Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 14/14] mm: fork: Enable COW PTE to fork system call Chih-En Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221220072743.3039060-5-shiyn.lin@gmail.com \
    --to=shiyn.lin@gmail.com \
    --cc=Jason@zx2c4.com \
    --cc=Liam.Howlett@Oracle.com \
    --cc=acme@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=alex.sierra@amd.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=anshuman.khandual@arm.com \
    --cc=baohua@kernel.org \
    --cc=bgeffon@google.com \
    --cc=bigeasy@linutronix.de \
    --cc=brho@google.com \
    --cc=broonie@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=ccross@google.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=fenghua.yu@intel.com \
    --cc=foxhoundsk.tw@gmail.com \
    --cc=gautammenghani201@gmail.com \
    --cc=hughd@google.com \
    --cc=jhubbard@nvidia.com \
    --cc=jolsa@kernel.org \
    --cc=jserv@ccns.ncku.edu.tw \
    --cc=kunyu@nfschina.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liushixin2@huawei.com \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mhiramat@kernel.org \
    --cc=minchan@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=namit@vmware.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=peng301@purdue.edu \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pfonseca@purdue.edu \
    --cc=rostedt@goodmis.org \
    --cc=shy828301@gmail.com \
    --cc=suleiman@google.com \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=tongtiangen@huawei.com \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=xianting.tian@linux.alibaba.com \
    --cc=yuzhao@google.com \
    --cc=zhengqi.arch@bytedance.com \
    --cc=zokeefe@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.