linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Khalid Aziz <khalid.aziz@oracle.com>
To: akpm@linux-foundation.org, willy@infradead.org
Cc: Khalid Aziz <khalid.aziz@oracle.com>,
	aneesh.kumar@linux.ibm.com, arnd@arndb.de, 21cnbao@gmail.com,
	corbet@lwn.net, dave.hansen@linux.intel.com, david@redhat.com,
	ebiederm@xmission.com, hagen@jauu.net, jack@suse.cz,
	keescook@chromium.org, kirill@shutemov.name, kucharsk@gmail.com,
	linkinjeon@kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	longpeng2@huawei.com, luto@kernel.org, markhemm@googlemail.com,
	pcc@google.com, rppt@kernel.org, sieberf@amazon.com,
	sjpark@amazon.de, surenb@google.com, tst@schoebel-theuer.de,
	yzaikin@google.com
Subject: [PATCH v1 14/14] mm/mshare: Copy PTEs to host mm
Date: Mon, 11 Apr 2022 10:05:58 -0600	[thread overview]
Message-ID: <4834724ec012da591863371cf4423d1971771d99.1649370874.git.khalid.aziz@oracle.com> (raw)
In-Reply-To: <cover.1649370874.git.khalid.aziz@oracle.com>

VMAs for shared addresses are hosted by a separate host mm. Copy over
original PTEs from the donor process to host mm so the PTEs are
maintained independent of donor process.

Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/mshare.c        | 14 +++++---------
 3 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d9456d424202..78c22891a792 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1845,6 +1845,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 int
 copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+int
+mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
 int follow_invalidate_pte(struct mm_struct *mm, unsigned long address,
 			  struct mmu_notifier_range *range, pte_t **ptepp,
 			  pmd_t **pmdpp, spinlock_t **ptlp);
diff --git a/mm/memory.c b/mm/memory.c
index e7c5bc6f8836..9010d68f053a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1234,6 +1234,54 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
 	return 0;
 }
 
+/*
+ * Copy PTEs for mshare'd pages.
+ * This code is based upon copy_page_range()
+ */
+int
+mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
+{
+	pgd_t *src_pgd, *dst_pgd;
+	unsigned long next;
+	unsigned long addr = src_vma->vm_start;
+	unsigned long end = src_vma->vm_end;
+	struct mm_struct *dst_mm = dst_vma->vm_mm;
+	struct mm_struct *src_mm = src_vma->vm_mm;
+	struct mmu_notifier_range range;
+	int ret = 0;
+
+	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
+				0, src_vma, src_mm, addr, end);
+	mmu_notifier_invalidate_range_start(&range);
+	/*
+	 * Disabling preemption is not needed for the write side, as
+	 * the read side doesn't spin, but goes to the mmap_lock.
+	 *
+	 * Use the raw variant of the seqcount_t write API to avoid
+	 * lockdep complaining about preemptibility.
+	 */
+	mmap_assert_write_locked(src_mm);
+	raw_write_seqcount_begin(&src_mm->write_protect_seq);
+
+	dst_pgd = pgd_offset(dst_mm, addr);
+	src_pgd = pgd_offset(src_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(src_pgd))
+			continue;
+		if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd,
+					    addr, next))) {
+			ret = -ENOMEM;
+			break;
+		}
+	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
+
+	raw_write_seqcount_end(&src_mm->write_protect_seq);
+	mmu_notifier_invalidate_range_end(&range);
+
+	return ret;
+}
+
 int
 copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 {
diff --git a/mm/mshare.c b/mm/mshare.c
index fba31f3c190f..a399234bf106 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -385,7 +385,6 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
 			 * Copy this vma over to host mm
 			 */
 			vma->vm_private_data = info;
-			vma->vm_mm = new_mm;
 			vma->vm_flags |= VM_SHARED_PT;
 			new_vma = vm_area_dup(vma);
 			if (!new_vma) {
@@ -394,6 +393,7 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
 				err = -ENOMEM;
 				goto free_info;
 			}
+			new_vma->vm_mm = new_mm;
 			err = insert_vm_struct(new_mm, new_vma);
 			if (err) {
 				mmap_write_unlock(new_mm);
@@ -402,17 +402,13 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
 				goto free_info;
 			}
 
+			/* Copy over current PTEs */
+			err = mshare_copy_ptes(new_vma, vma);
+			if (err != 0)
+				goto free_info;
 			vma = vma->vm_next;
 		}
 
-		/*
-		 * Copy over current PTEs
-		 */
-		myaddr = addr;
-		while (myaddr < new_mm->task_size) {
-			*pgd_offset(new_mm, myaddr) = *pgd_offset(old_mm, myaddr);
-			myaddr += PGDIR_SIZE;
-		}
 		/*
 		 * TODO: Free the corresponding page table in calling
 		 * process
-- 
2.32.0



  parent reply	other threads:[~2022-04-11 16:08 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-11 16:05 [PATCH v1 00/14] Add support for shared PTEs across processes Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 01/14] mm: Add new system calls mshare, mshare_unlink Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 02/14] mm/mshare: Add msharefs filesystem Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 03/14] mm/mshare: Add read for msharefs Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 04/14] mm/mshare: implement mshare_unlink syscall Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 05/14] mm/mshare: Add locking to msharefs syscalls Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 06/14] mm/mshare: Check for mounted filesystem Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 07/14] mm/mshare: Add vm flag for shared PTE Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 08/14] mm/mshare: Add basic page table sharing using mshare Khalid Aziz
2022-04-11 18:48   ` Dave Hansen
2022-04-11 20:39     ` Khalid Aziz
2022-05-30 11:11   ` Barry Song
2022-06-28 20:11     ` Khalid Aziz
2022-05-31  3:46   ` Barry Song
2022-06-28 20:16     ` Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 09/14] mm/mshare: Do not free PTEs for mshare'd PTEs Khalid Aziz
2022-05-31  4:24   ` Barry Song
2022-06-29 17:38     ` Khalid Aziz
2022-07-03 20:54       ` Andy Lutomirski
2022-07-06 20:33         ` Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 10/14] mm/mshare: Check for mapped vma when mshare'ing existing mshare'd range Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 11/14] mm/mshare: unmap vmas in mshare_unlink Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 12/14] mm/mshare: Add a proc file with mshare alignment/size information Khalid Aziz
2022-04-11 16:05 ` [PATCH v1 13/14] mm/mshare: Enforce mshare'd region permissions Khalid Aziz
2022-04-11 16:05 ` Khalid Aziz [this message]
2022-04-11 17:37 ` [PATCH v1 00/14] Add support for shared PTEs across processes Matthew Wilcox
2022-04-11 18:51   ` Dave Hansen
2022-04-11 19:08     ` Matthew Wilcox
2022-04-11 19:52   ` Khalid Aziz
2022-04-11 18:47 ` Dave Hansen
2022-04-11 20:10 ` Eric W. Biederman
2022-04-11 22:21   ` Khalid Aziz
2022-05-30 10:48 ` Barry Song
2022-05-30 11:18   ` David Hildenbrand
2022-05-30 11:49     ` Barry Song
2022-06-29 17:48     ` Khalid Aziz
2022-06-29 17:40   ` Khalid Aziz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4834724ec012da591863371cf4423d1971771d99.1649370874.git.khalid.aziz@oracle.com \
    --to=khalid.aziz@oracle.com \
    --cc=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=arnd@arndb.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=hagen@jauu.net \
    --cc=jack@suse.cz \
    --cc=keescook@chromium.org \
    --cc=kirill@shutemov.name \
    --cc=kucharsk@gmail.com \
    --cc=linkinjeon@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longpeng2@huawei.com \
    --cc=luto@kernel.org \
    --cc=markhemm@googlemail.com \
    --cc=pcc@google.com \
    --cc=rppt@kernel.org \
    --cc=sieberf@amazon.com \
    --cc=sjpark@amazon.de \
    --cc=surenb@google.com \
    --cc=tst@schoebel-theuer.de \
    --cc=willy@infradead.org \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).