All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chih-En Lin <shiyn.lin@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Qi Zheng <zhengqi.arch@bytedance.com>,
	David Hildenbrand <david@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	John Hubbard <jhubbard@nvidia.com>, Nadav Amit <namit@vmware.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Steven Rostedt <rostedt@goodmis.org>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Yang Shi <shy828301@gmail.com>, Peter Xu <peterx@redhat.com>,
	Zach O'Keefe <zokeefe@google.com>,
	"Liam R . Howlett" <Liam.Howlett@Oracle.com>,
	Alex Sierra <alex.sierra@amd.com>,
	Xianting Tian <xianting.tian@linux.alibaba.com>,
	Colin Cross <ccross@google.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Barry Song <baohua@kernel.org>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	Suleiman Souhlal <suleiman@google.com>,
	Brian Geffon <bgeffon@google.com>, Yu Zhao <yuzhao@google.com>,
	Tong Tiangen <tongtiangen@huawei.com>,
	Liu Shixin <liushixin2@huawei.com>, Li kunyu <kunyu@nfschina.com>,
	Anshuman Khandual <anshuman.khandual@arm.com>,
	Vlastimil Babka <vbabka@suse.cz>, Hugh Dickins <hughd@google.com>,
	Minchan Kim <minchan@kernel.org>,
	Miaohe Lin <linmiaohe@huawei.com>,
	Gautam Menghani <gautammenghani201@gmail.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Mark Brown <broonie@kernel.org>, Will Deacon <will@kernel.org>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Andy Lutomirski <luto@kernel.org>,
	Fenghua Yu <fenghua.yu@intel.com>,
	Barret Rhoden <brho@google.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>,
	Dinglan Peng <peng301@purdue.edu>,
	Pedro Fonseca <pfonseca@purdue.edu>,
	Jim Huang <jserv@ccns.ncku.edu.tw>,
	Huichun Feng <foxhoundsk.tw@gmail.com>,
	Chih-En Lin <shiyn.lin@gmail.com>
Subject: [PATCH v3 01/14] mm: Allow user to control COW PTE via prctl
Date: Tue, 20 Dec 2022 15:27:30 +0800	[thread overview]
Message-ID: <20221220072743.3039060-2-shiyn.lin@gmail.com> (raw)
In-Reply-To: <20221220072743.3039060-1-shiyn.lin@gmail.com>

Add a new prctl, PR_SET_COW_PTE, to allow the user to enable COW PTE.
Since it has a time gap between using the prctl to enable the COW PTE
and doing the fork, we use two states (MMF_COW_PTE_READY and MMF_COW_PTE)
to determine the task that wants to do COW PTE or already doing it.

The MMF_COW_PTE_READY flag marks the task to do COW PTE in the next time
of fork(). During fork(), if MMF_COW_PTE_READY set, fork() will unset the
flag and set the MMF_COW_PTE flag. After that, fork() might shares PTEs
instead of duplicates it.

Signed-off-by: Chih-En Lin <shiyn.lin@gmail.com>
---
 include/linux/sched/coredump.h | 12 +++++++++++-
 include/uapi/linux/prctl.h     |  6 ++++++
 kernel/sys.c                   | 11 +++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 8270ad7ae14c2..570d599ebc851 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -83,7 +83,17 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_HAS_PINNED		27	/* FOLL_PIN has run, never cleared */
 #define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 
+/*
+ * MMF_COW_PTE_READY: Marking the task to do COW PTE in the next time of
+ * fork(). During fork(), if MMF_COW_PTE_READY set, fork() will unset the
+ * flag and set the MMF_COW_PTE flag. After that, fork() might shares PTEs
+ * rather than duplicates it.
+ */
+#define MMF_COW_PTE_READY	29 /* Share PTE tables in next time of fork() */
+#define MMF_COW_PTE		30 /* PTE tables are shared between processes */
+#define MMF_COW_PTE_MASK	(1 << MMF_COW_PTE)
+
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
-				 MMF_DISABLE_THP_MASK)
+				 MMF_DISABLE_THP_MASK | MMF_COW_PTE_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a5e06dcbba136..664a3c0230192 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -284,4 +284,10 @@ struct prctl_mm_map {
 #define PR_SET_VMA		0x53564d41
 # define PR_SET_VMA_ANON_NAME		0
 
+/*
+ * Set the prepare flag, MMF_COW_PTE_READY, to do the share (copy-on-write)
+ * page table in the next time of fork.
+ */
+#define PR_SET_COW_PTE			65
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 5fd54bf0e8867..d1062ea33981e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2348,6 +2348,14 @@ static int prctl_set_vma(unsigned long opt, unsigned long start,
 }
 #endif /* CONFIG_ANON_VMA_NAME */
 
+static int prctl_set_cow_pte(struct mm_struct *mm)
+{
+	if (test_bit(MMF_COW_PTE, &mm->flags))
+		return -EINVAL;
+	set_bit(MMF_COW_PTE_READY, &mm->flags);
+	return 0;
+}
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -2626,6 +2634,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_SET_VMA:
 		error = prctl_set_vma(arg2, arg3, arg4, arg5);
 		break;
+	case PR_SET_COW_PTE:
+		error = prctl_set_cow_pte(me->mm);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
2.37.3


  reply	other threads:[~2022-12-20  7:25 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-20  7:27 [PATCH v3 00/14] Introduce Copy-On-Write to Page Table Chih-En Lin
2022-12-20  7:27 ` Chih-En Lin [this message]
2022-12-20  7:27 ` [PATCH v3 02/14] mm: Add Copy-On-Write PTE to fork() Chih-En Lin
2022-12-20 10:59   ` kernel test robot
2022-12-20 12:42   ` kernel test robot
2022-12-20  7:27 ` [PATCH v3 03/14] mm: Add break COW PTE fault and helper functions Chih-En Lin
2022-12-20 12:00   ` kernel test robot
2022-12-20  7:27 ` [PATCH v3 04/14] mm/rmap: Break COW PTE in rmap walking Chih-En Lin
2022-12-26  9:40   ` Barry Song
2022-12-26 10:59     ` Chih-En Lin
2022-12-27  1:15       ` Barry Song
2022-12-27  2:40         ` Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 05/14] mm/khugepaged: Break COW PTE before scanning pte Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 06/14] mm/ksm: Break COW PTE before modify shared PTE Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 07/14] mm/madvise: Handle COW-ed PTE with madvise() Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 08/14] mm/gup: Break COW PTE in follow_pfn_pte() Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 09/14] mm/mprotect: Break COW PTE before changing protection Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 10/14] mm/userfaultfd: Support COW PTE Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 11/14] mm/migrate_device: " Chih-En Lin
2022-12-20 11:50   ` kernel test robot
2022-12-20 14:23   ` kernel test robot
2022-12-20  7:27 ` [PATCH v3 12/14] fs/proc: Support COW PTE with clear_refs_write Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 13/14] events/uprobes: Break COW PTE before replacing page Chih-En Lin
2022-12-20  7:27 ` [PATCH v3 14/14] mm: fork: Enable COW PTE to fork system call Chih-En Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221220072743.3039060-2-shiyn.lin@gmail.com \
    --to=shiyn.lin@gmail.com \
    --cc=Jason@zx2c4.com \
    --cc=Liam.Howlett@Oracle.com \
    --cc=acme@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=alex.sierra@amd.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=anshuman.khandual@arm.com \
    --cc=baohua@kernel.org \
    --cc=bgeffon@google.com \
    --cc=bigeasy@linutronix.de \
    --cc=brho@google.com \
    --cc=broonie@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=ccross@google.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=fenghua.yu@intel.com \
    --cc=foxhoundsk.tw@gmail.com \
    --cc=gautammenghani201@gmail.com \
    --cc=hughd@google.com \
    --cc=jhubbard@nvidia.com \
    --cc=jolsa@kernel.org \
    --cc=jserv@ccns.ncku.edu.tw \
    --cc=kunyu@nfschina.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liushixin2@huawei.com \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mhiramat@kernel.org \
    --cc=minchan@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=namit@vmware.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=peng301@purdue.edu \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pfonseca@purdue.edu \
    --cc=rostedt@goodmis.org \
    --cc=shy828301@gmail.com \
    --cc=suleiman@google.com \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=tongtiangen@huawei.com \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=xianting.tian@linux.alibaba.com \
    --cc=yuzhao@google.com \
    --cc=zhengqi.arch@bytedance.com \
    --cc=zokeefe@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.