All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Hugh Dickins <hughd@google.com>, Maya Gokhale <gokhale2@llnl.gov>,
	Jerome Glisse <jglisse@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	peterx@redhat.com, Martin Cracauer <cracauer@cons.org>,
	Denis Plotnikov <dplotnikov@virtuozzo.com>,
	Shaohua Li <shli@fb.com>, Andrea Arcangeli <aarcange@redhat.com>,
	Pavel Emelyanov <xemul@parallels.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Marty McFadden <mcfadden8@llnl.gov>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>,
	Mel Gorman <mgorman@suse.de>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>
Subject: [PATCH RFC 14/24] userfaultfd: wp: apply _PAGE_UFFD_WP bit
Date: Mon, 21 Jan 2019 15:57:12 +0800	[thread overview]
Message-ID: <20190121075722.7945-15-peterx@redhat.com> (raw)
In-Reply-To: <20190121075722.7945-1-peterx@redhat.com>

Firstly, introduce two new flags MM_CP_UFFD_WP[_RESOLVE] for
change_protection() when used with uffd-wp and make sure the two new
flags are exclusively used.  Then,

  - For MM_CP_UFFD_WP: apply the _PAGE_UFFD_WP bit and remove _PAGE_RW
    when a range of memory is write protected by uffd

  - For MM_CP_UFFD_WP_RESOLVE: remove the _PAGE_UFFD_WP bit and recover
    _PAGE_RW when write protection is resolved from userspace

And use this new interface in mwriteprotect_range() to replace the old
MM_CP_DIRTY_ACCT.

Do this change for both PTEs and huge PMDs.  Then we can start to
identify which PTE/PMD is write protected by general (e.g., COW or soft
dirty tracking), and which is for userfaultfd-wp.

Since we should keep the _PAGE_UFFD_WP when doing pte_modify(), add it
into _PAGE_CHG_MASK as well.  Meanwhile, since we have this new bit, we
can be even more strict when detecting uffd-wp page faults in either
do_wp_page() or wp_huge_pmd().

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 arch/x86/include/asm/pgtable_types.h |  2 +-
 include/linux/mm.h                   |  5 +++++
 mm/huge_memory.c                     | 14 +++++++++++++-
 mm/memory.c                          |  4 ++--
 mm/mprotect.c                        | 12 ++++++++++++
 mm/userfaultfd.c                     | 10 +++++++---
 6 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 163043ab142d..d6972b4c6abc 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -133,7 +133,7 @@
  */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
 			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
-			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_UFFD_WP)
 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 /*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 452fcc31fa29..89345b51d8bd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1599,6 +1599,11 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
 #define  MM_CP_DIRTY_ACCT                  (1UL << 0)
 /* Whether this protection change is for NUMA hints */
 #define  MM_CP_PROT_NUMA                   (1UL << 1)
+/* Whether this change is for write protecting */
+#define  MM_CP_UFFD_WP                     (1UL << 2) /* do wp */
+#define  MM_CP_UFFD_WP_RESOLVE             (1UL << 3) /* Resolve wp */
+#define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
+					    MM_CP_UFFD_WP_RESOLVE)
 
 extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 			      unsigned long end, pgprot_t newprot,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index be8160bb7cac..169795c8e56c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1864,6 +1864,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	bool preserve_write;
 	int ret;
 	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
+	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
+	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
 
 	ptl = __pmd_trans_huge_lock(pmd, vma);
 	if (!ptl)
@@ -1930,6 +1932,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	entry = pmd_modify(entry, newprot);
 	if (preserve_write)
 		entry = pmd_mk_savedwrite(entry);
+	if (uffd_wp) {
+		entry = pmd_wrprotect(entry);
+		entry = pmd_mkuffd_wp(entry);
+	} else if (uffd_wp_resolve) {
+		entry = pmd_mkwrite(entry);
+		entry = pmd_clear_uffd_wp(entry);
+	}
 	ret = HPAGE_PMD_NR;
 	set_pmd_at(mm, addr, pmd, entry);
 	BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
@@ -2079,7 +2088,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 	struct page *page;
 	pgtable_t pgtable;
 	pmd_t old_pmd, _pmd;
-	bool young, write, soft_dirty, pmd_migration = false;
+	bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false;
 	unsigned long addr;
 	int i;
 
@@ -2161,6 +2170,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		write = pmd_write(old_pmd);
 		young = pmd_young(old_pmd);
 		soft_dirty = pmd_soft_dirty(old_pmd);
+		uffd_wp = pmd_uffd_wp(old_pmd);
 	}
 	VM_BUG_ON_PAGE(!page_count(page), page);
 	page_ref_add(page, HPAGE_PMD_NR - 1);
@@ -2194,6 +2204,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 				entry = pte_mkold(entry);
 			if (soft_dirty)
 				entry = pte_mksoft_dirty(entry);
+			if (uffd_wp)
+				entry = pte_mkuffd_wp(entry);
 		}
 		pte = pte_offset_map(&_pmd, addr);
 		BUG_ON(!pte_none(*pte));
diff --git a/mm/memory.c b/mm/memory.c
index 89d51d1650e4..7f276158683b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2482,7 +2482,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
-	if (userfaultfd_wp(vma)) {
+	if (userfaultfd_pte_wp(vma, *vmf->pte)) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return handle_userfault(vmf, VM_UFFD_WP);
 	}
@@ -3670,7 +3670,7 @@ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
 static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	if (vma_is_anonymous(vmf->vma)) {
-		if (userfaultfd_wp(vmf->vma))
+		if (userfaultfd_huge_pmd_wp(vmf->vma, orig_pmd))
 			return handle_userfault(vmf, VM_UFFD_WP);
 		return do_huge_pmd_wp_page(vmf, orig_pmd);
 	}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 416ede326c03..000e246c163b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -46,6 +46,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 	int target_node = NUMA_NO_NODE;
 	bool dirty_accountable = cp_flags & MM_CP_DIRTY_ACCT;
 	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
+	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
+	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
 
 	/*
 	 * Can be called with only the mmap_sem for reading by
@@ -117,6 +119,14 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			if (preserve_write)
 				ptent = pte_mk_savedwrite(ptent);
 
+			if (uffd_wp) {
+				ptent = pte_wrprotect(ptent);
+				ptent = pte_mkuffd_wp(ptent);
+			} else if (uffd_wp_resolve) {
+				ptent = pte_mkwrite(ptent);
+				ptent = pte_clear_uffd_wp(ptent);
+			}
+
 			/* Avoid taking write faults for known dirty pages */
 			if (dirty_accountable && pte_dirty(ptent) &&
 					(pte_soft_dirty(ptent) ||
@@ -300,6 +310,8 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 {
 	unsigned long pages;
 
+	BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
+
 	if (is_vm_hugetlb_page(vma))
 		pages = hugetlb_change_protection(vma, start, end, newprot);
 	else
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 23d4bbd117ee..902247ca1474 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -73,8 +73,12 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 		goto out_release;
 
 	_dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
-	if (dst_vma->vm_flags & VM_WRITE && !wp_copy)
-		_dst_pte = pte_mkwrite(_dst_pte);
+	if (dst_vma->vm_flags & VM_WRITE) {
+		if (wp_copy)
+			_dst_pte = pte_mkuffd_wp(_dst_pte);
+		else
+			_dst_pte = pte_mkwrite(_dst_pte);
+	}
 
 	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
 	if (dst_vma->vm_file) {
@@ -674,7 +678,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 		newprot = vm_get_page_prot(dst_vma->vm_flags);
 
 	change_protection(dst_vma, start, start + len, newprot,
-			  enable_wp ? 0 : MM_CP_DIRTY_ACCT);
+			  enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
 
 	err = 0;
 out_unlock:
-- 
2.17.1


  parent reply	other threads:[~2019-01-21  7:59 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-21  7:56 [PATCH RFC 00/24] userfaultfd: write protection support Peter Xu
2019-01-21  7:56 ` [PATCH RFC 01/24] mm: gup: rename "nonblocking" to "locked" where proper Peter Xu
2019-01-21 10:20   ` Mike Rapoport
2019-01-21  7:57 ` [PATCH RFC 02/24] mm: userfault: return VM_FAULT_RETRY on signals Peter Xu
2019-01-21 15:40   ` Jerome Glisse
2019-01-22  6:10     ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 03/24] mm: allow VM_FAULT_RETRY for multiple times Peter Xu
2019-01-21 15:55   ` Jerome Glisse
2019-01-22  8:22     ` Peter Xu
2019-01-22 16:53       ` Jerome Glisse
2019-01-23  2:12         ` Peter Xu
2019-01-23  2:39           ` Jerome Glisse
2019-01-24  5:45             ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 04/24] mm: gup: " Peter Xu
2019-01-21 16:24   ` Jerome Glisse
2019-01-24  7:05     ` Peter Xu
2019-01-24 15:34       ` Jerome Glisse
2019-01-25  2:49         ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 05/24] userfaultfd: wp: add helper for writeprotect check Peter Xu
2019-01-21 10:23   ` Mike Rapoport
2019-01-22  8:31     ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 06/24] userfaultfd: wp: support write protection for userfault vma range Peter Xu
2019-01-21 10:20   ` Mike Rapoport
2019-01-22  8:55     ` Peter Xu
2019-01-21 14:05   ` Jerome Glisse
2019-01-22  9:39     ` Peter Xu
2019-01-22 17:02       ` Jerome Glisse
2019-01-23  2:17         ` Peter Xu
2019-01-23  2:43           ` Jerome Glisse
2019-01-24  5:47             ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 07/24] userfaultfd: wp: add the writeprotect API to userfaultfd ioctl Peter Xu
2019-01-21 10:42   ` Mike Rapoport
2019-01-24  4:56     ` Peter Xu
2019-01-24  7:27       ` Mike Rapoport
2019-01-24  9:28         ` Peter Xu
2019-01-25  7:54           ` Mike Rapoport
2019-01-25 10:12             ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 08/24] userfaultfd: wp: hook userfault handler to write protection fault Peter Xu
2019-01-21  7:57 ` [PATCH RFC 09/24] userfaultfd: wp: enabled write protection in userfaultfd API Peter Xu
2019-01-21  7:57 ` [PATCH RFC 10/24] userfaultfd: wp: add WP pagetable tracking to x86 Peter Xu
2019-01-21 15:09   ` Jerome Glisse
2019-01-24  5:16     ` Peter Xu
2019-01-24 15:40       ` Jerome Glisse
2019-01-25  3:30         ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 11/24] userfaultfd: wp: userfaultfd_pte/huge_pmd_wp() helpers Peter Xu
2019-01-21  7:57 ` [PATCH RFC 12/24] userfaultfd: wp: add UFFDIO_COPY_MODE_WP Peter Xu
2019-01-21  7:57 ` [PATCH RFC 13/24] mm: merge parameters for change_protection() Peter Xu
2019-01-21 13:54   ` Jerome Glisse
2019-01-24  5:22     ` Peter Xu
2019-01-21  7:57 ` Peter Xu [this message]
2019-01-21  7:57 ` [PATCH RFC 15/24] mm: export wp_page_copy() Peter Xu
2019-01-21  7:57 ` [PATCH RFC 16/24] userfaultfd: wp: handle COW properly for uffd-wp Peter Xu
2019-01-21  7:57 ` [PATCH RFC 17/24] userfaultfd: wp: drop _PAGE_UFFD_WP properly when fork Peter Xu
2019-01-21  7:57 ` [PATCH RFC 18/24] userfaultfd: wp: add pmd_swp_*uffd_wp() helpers Peter Xu
2019-01-21  7:57 ` [PATCH RFC 19/24] userfaultfd: wp: support swap and page migration Peter Xu
2019-01-21  7:57 ` [PATCH RFC 20/24] userfaultfd: wp: don't wake up when doing write protect Peter Xu
2019-01-21 11:10   ` Mike Rapoport
2019-01-24  5:36     ` Peter Xu
2019-01-21  7:57 ` [PATCH RFC 21/24] khugepaged: skip collapse if uffd-wp detected Peter Xu
2019-01-21  7:57 ` [PATCH RFC 22/24] userfaultfd: wp: UFFDIO_REGISTER_MODE_WP documentation update Peter Xu
2019-01-21  7:57 ` [PATCH RFC 23/24] userfaultfd: selftests: refactor statistics Peter Xu
2019-01-21  7:57 ` [PATCH RFC 24/24] userfaultfd: selftests: add write-protect test Peter Xu
2019-01-21 14:33 ` [PATCH RFC 00/24] userfaultfd: write protection support David Hildenbrand
2019-01-22  3:18   ` Peter Xu
2019-01-22  8:59     ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190121075722.7945-15-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=cracauer@cons.org \
    --cc=dgilbert@redhat.com \
    --cc=dplotnikov@virtuozzo.com \
    --cc=gokhale2@llnl.gov \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jglisse@redhat.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mcfadden8@llnl.gov \
    --cc=mgorman@suse.de \
    --cc=mike.kravetz@oracle.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=shli@fb.com \
    --cc=xemul@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.