linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: David Hildenbrand <david@redhat.com>,
	Hugh Dickins <hughd@google.com>, Maya Gokhale <gokhale2@llnl.gov>,
	Jerome Glisse <jglisse@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	peterx@redhat.com, Martin Cracauer <cracauer@cons.org>,
	Shaohua Li <shli@fb.com>, Andrea Arcangeli <aarcange@redhat.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Denis Plotnikov <dplotnikov@virtuozzo.com>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>,
	Marty McFadden <mcfadden8@llnl.gov>, Mel Gorman <mgorman@suse.de>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>
Subject: [PATCH v3 17/28] userfaultfd: wp: support swap and page migration
Date: Wed, 20 Mar 2019 10:06:31 +0800	[thread overview]
Message-ID: <20190320020642.4000-18-peterx@redhat.com> (raw)
In-Reply-To: <20190320020642.4000-1-peterx@redhat.com>

For either swap and page migration, we all use the bit 2 of the entry to
identify whether this entry is uffd write-protected.  It plays a similar
role as the existing soft dirty bit in swap entries but only for keeping
the uffd-wp tracking for a specific PTE/PMD.

Something special here is that when we want to recover the uffd-wp bit
from a swap/migration entry to the PTE bit we'll also need to take care
of the _PAGE_RW bit and make sure it's cleared, otherwise even with the
_PAGE_UFFD_WP bit we can't trap it at all.

Note that this patch removed two lines from "userfaultfd: wp: hook
userfault handler to write protection fault" where we try to remove the
VM_FAULT_WRITE from vmf->flags when uffd-wp is set for the VMA.  This
patch will still keep the write flag there.

Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/swapops.h | 2 ++
 mm/huge_memory.c        | 3 +++
 mm/memory.c             | 6 ++++++
 mm/migrate.c            | 4 ++++
 mm/mprotect.c           | 2 ++
 mm/rmap.c               | 6 ++++++
 6 files changed, 23 insertions(+)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 4d961668e5fc..0c2923b1cdb7 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 
 	if (pte_swp_soft_dirty(pte))
 		pte = pte_swp_clear_soft_dirty(pte);
+	if (pte_swp_uffd_wp(pte))
+		pte = pte_swp_clear_uffd_wp(pte);
 	arch_entry = __pte_to_swp_entry(pte);
 	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fb2234cb595a..75de07141801 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2175,6 +2175,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		write = is_write_migration_entry(entry);
 		young = false;
 		soft_dirty = pmd_swp_soft_dirty(old_pmd);
+		uffd_wp = pmd_swp_uffd_wp(old_pmd);
 	} else {
 		page = pmd_page(old_pmd);
 		if (pmd_dirty(old_pmd))
@@ -2207,6 +2208,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			entry = swp_entry_to_pte(swp_entry);
 			if (soft_dirty)
 				entry = pte_swp_mksoft_dirty(entry);
+			if (uffd_wp)
+				entry = pte_swp_mkuffd_wp(entry);
 		} else {
 			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
 			entry = maybe_mkwrite(entry, vma);
diff --git a/mm/memory.c b/mm/memory.c
index 6405d56debee..c3d57fa890f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -736,6 +736,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 				pte = swp_entry_to_pte(entry);
 				if (pte_swp_soft_dirty(*src_pte))
 					pte = pte_swp_mksoft_dirty(pte);
+				if (pte_swp_uffd_wp(*src_pte))
+					pte = pte_swp_mkuffd_wp(pte);
 				set_pte_at(src_mm, addr, src_pte, pte);
 			}
 		} else if (is_device_private_entry(entry)) {
@@ -2825,6 +2827,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	flush_icache_page(vma, page);
 	if (pte_swp_soft_dirty(vmf->orig_pte))
 		pte = pte_mksoft_dirty(pte);
+	if (pte_swp_uffd_wp(vmf->orig_pte)) {
+		pte = pte_mkuffd_wp(pte);
+		pte = pte_wrprotect(pte);
+	}
 	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
 	arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
 	vmf->orig_pte = pte;
diff --git a/mm/migrate.c b/mm/migrate.c
index 181f5d2718a9..72cde187d4a1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -241,6 +241,8 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 		entry = pte_to_swp_entry(*pvmw.pte);
 		if (is_write_migration_entry(entry))
 			pte = maybe_mkwrite(pte, vma);
+		else if (pte_swp_uffd_wp(*pvmw.pte))
+			pte = pte_mkuffd_wp(pte);
 
 		if (unlikely(is_zone_device_page(new))) {
 			if (is_device_private_page(new)) {
@@ -2301,6 +2303,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pte))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pte))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, addr, ptep, swp_pte);
 
 			/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 855dddb07ff2..96c0f521099d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -196,6 +196,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				newpte = swp_entry_to_pte(entry);
 				if (pte_swp_soft_dirty(oldpte))
 					newpte = pte_swp_mksoft_dirty(newpte);
+				if (pte_swp_uffd_wp(oldpte))
+					newpte = pte_swp_mkuffd_wp(newpte);
 				set_pte_at(mm, addr, pte, newpte);
 
 				pages++;
diff --git a/mm/rmap.c b/mm/rmap.c
index 0454ecc29537..3750d5a5283c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1469,6 +1469,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 			/*
 			 * No need to invalidate here it will synchronize on
@@ -1561,6 +1563,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, address, pvmw.pte, swp_pte);
 			/*
 			 * No need to invalidate here it will synchronize on
@@ -1627,6 +1631,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
+			if (pte_uffd_wp(pteval))
+				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			set_pte_at(mm, address, pvmw.pte, swp_pte);
 			/* Invalidate as we cleared the pte */
 			mmu_notifier_invalidate_range(mm, address,
-- 
2.17.1


  parent reply	other threads:[~2019-03-20  2:09 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-20  2:06 [PATCH v3 00/28] userfaultfd: write protection support Peter Xu
2019-03-20  2:06 ` [PATCH v3 01/28] mm: gup: rename "nonblocking" to "locked" where proper Peter Xu
2019-03-20  2:06 ` [PATCH v3 02/28] mm: userfault: return VM_FAULT_RETRY on signals Peter Xu
2019-03-20  2:06 ` [PATCH v3 03/28] userfaultfd: don't retake mmap_sem to emulate NOPAGE Peter Xu
2019-03-20  2:06 ` [PATCH v3 04/28] mm: allow VM_FAULT_RETRY for multiple times Peter Xu
2019-04-18 20:11   ` Jerome Glisse
2019-04-19  6:00     ` Peter Xu
2019-03-20  2:06 ` [PATCH v3 05/28] mm: gup: " Peter Xu
2019-03-20  2:06 ` [PATCH v3 06/28] userfaultfd: wp: add helper for writeprotect check Peter Xu
2019-03-20  2:06 ` [PATCH v3 07/28] userfaultfd: wp: hook userfault handler to write protection fault Peter Xu
2019-04-18 20:03   ` Jerome Glisse
2019-03-20  2:06 ` [PATCH v3 08/28] userfaultfd: wp: add WP pagetable tracking to x86 Peter Xu
2019-03-20  2:06 ` [PATCH v3 09/28] userfaultfd: wp: userfaultfd_pte/huge_pmd_wp() helpers Peter Xu
2019-03-20  2:06 ` [PATCH v3 10/28] userfaultfd: wp: add UFFDIO_COPY_MODE_WP Peter Xu
2019-03-20  2:06 ` [PATCH v3 11/28] mm: merge parameters for change_protection() Peter Xu
2019-03-20  2:06 ` [PATCH v3 12/28] userfaultfd: wp: apply _PAGE_UFFD_WP bit Peter Xu
2019-03-20  2:06 ` [PATCH v3 13/28] mm: export wp_page_copy() Peter Xu
2019-03-20  2:06 ` [PATCH v3 14/28] userfaultfd: wp: handle COW properly for uffd-wp Peter Xu
2019-04-18 20:51   ` Jerome Glisse
2019-04-19  6:26     ` Peter Xu
2019-04-19 15:02       ` Jerome Glisse
2019-04-22 12:20         ` Peter Xu
2019-04-22 14:54           ` Jerome Glisse
2019-04-23  3:00             ` Peter Xu
2019-04-23 15:34               ` Jerome Glisse
2019-04-24  8:38                 ` Peter Xu
2019-03-20  2:06 ` [PATCH v3 15/28] userfaultfd: wp: drop _PAGE_UFFD_WP properly when fork Peter Xu
2019-03-20  2:06 ` [PATCH v3 16/28] userfaultfd: wp: add pmd_swp_*uffd_wp() helpers Peter Xu
2019-03-20  2:06 ` Peter Xu [this message]
2019-04-18 20:59   ` [PATCH v3 17/28] userfaultfd: wp: support swap and page migration Jerome Glisse
2019-04-19  7:42     ` Peter Xu
2019-04-19 15:08       ` Jerome Glisse
2019-04-22 12:23         ` Peter Xu
2019-03-20  2:06 ` [PATCH v3 18/28] khugepaged: skip collapse if uffd-wp detected Peter Xu
2019-03-20  2:06 ` [PATCH v3 19/28] userfaultfd: introduce helper vma_find_uffd Peter Xu
2019-03-20  2:06 ` [PATCH v3 20/28] userfaultfd: wp: support write protection for userfault vma range Peter Xu
2019-03-20  2:06 ` [PATCH v3 21/28] userfaultfd: wp: add the writeprotect API to userfaultfd ioctl Peter Xu
2019-03-20  2:06 ` [PATCH v3 22/28] userfaultfd: wp: enabled write protection in userfaultfd API Peter Xu
2019-03-22 21:37   ` Mike Rapoport
2019-03-20  2:06 ` [PATCH v3 23/28] userfaultfd: wp: don't wake up when doing write protect Peter Xu
2019-03-20  2:06 ` [PATCH v3 24/28] userfaultfd: wp: UFFDIO_REGISTER_MODE_WP documentation update Peter Xu
2019-03-22 21:46   ` Mike Rapoport
2019-03-20  2:06 ` [PATCH v3 25/28] userfaultfd: wp: fixup swap entries in change_pte_range Peter Xu
2019-04-18 21:01   ` Jerome Glisse
2019-03-20  2:06 ` [PATCH v3 26/28] userfaultfd: wp: declare _UFFDIO_WRITEPROTECT conditionally Peter Xu
2019-03-22 21:43   ` Mike Rapoport
2019-03-20  2:06 ` [PATCH v3 27/28] userfaultfd: selftests: refactor statistics Peter Xu
2019-03-20  2:06 ` [PATCH v3 28/28] userfaultfd: selftests: add write-protect test Peter Xu
2019-04-09  6:08 ` [PATCH v3 00/28] userfaultfd: write protection support Peter Xu
2019-04-18 21:07   ` Jerome Glisse
2019-04-19  7:53     ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190320020642.4000-18-peterx@redhat.com \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=cracauer@cons.org \
    --cc=david@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=dplotnikov@virtuozzo.com \
    --cc=gokhale2@llnl.gov \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jglisse@redhat.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mcfadden8@llnl.gov \
    --cc=mgorman@suse.de \
    --cc=mike.kravetz@oracle.com \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=shli@fb.com \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).