All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] mm/userfaultfd: propagate uffd-wp bit when PTE-mapping the huge zeropage
@ 2023-03-02 17:54 David Hildenbrand
  2023-03-02 22:29 ` Peter Xu
  2023-03-03  1:57 ` Andrew Morton
  0 siblings, 2 replies; 7+ messages in thread
From: David Hildenbrand @ 2023-03-02 17:54 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mm, David Hildenbrand, Andrew Morton, Mike Rapoport,
	Andrea Arcangeli, Peter Xu, Jerome Glisse, Shaohua Li

Currently, we'd lose the userfaultfd-wp marker when PTE-mapping a huge
zeropage, resulting in the next write faults in the PMD range
not triggering uffd-wp events.

Various actions (partial MADV_DONTNEED, partial mremap, partial munmap,
partial mprotect) could trigger this. However, most importantly,
un-protecting a single sub-page from the userfaultfd-wp handler when
processing a uffd-wp event will PTE-map the shared huge zeropage and
lose the uffd-wp bit for the remainder of the PMD.

Let's properly propagate the uffd-wp bit to the PMDs.

---
 #define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <poll.h>
 #include <pthread.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <linux/userfaultfd.h>

 static size_t pagesize;
 static int uffd;
 static volatile bool uffd_triggered;

 #define barrier() __asm__ __volatile__("": : :"memory")

 static void uffd_wp_range(char *start, size_t size, bool wp)
 {
 	struct uffdio_writeprotect uffd_writeprotect;

 	uffd_writeprotect.range.start = (unsigned long) start;
 	uffd_writeprotect.range.len = size;
 	if (wp) {
 		uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
 	} else {
 		uffd_writeprotect.mode = 0;
 	}
 	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
 		fprintf(stderr, "UFFDIO_WRITEPROTECT failed: %d\n", errno);
 		exit(1);
 	}
 }

 static void *uffd_thread_fn(void *arg)
 {
 	static struct uffd_msg msg;
 	ssize_t nread;

 	while (1) {
 		struct pollfd pollfd;
 		int nready;

 		pollfd.fd = uffd;
 		pollfd.events = POLLIN;
 		nready = poll(&pollfd, 1, -1);
 		if (nready == -1) {
 			fprintf(stderr, "poll() failed: %d\n", errno);
 			exit(1);
 		}

 		nread = read(uffd, &msg, sizeof(msg));
 		if (nread <= 0)
 			continue;

 		if (msg.event != UFFD_EVENT_PAGEFAULT ||
 		    !(msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP)) {
 			printf("FAIL: wrong uffd-wp event fired\n");
 			exit(1);
 		}

 		/* un-protect the single page. */
 		uffd_triggered = true;
 		uffd_wp_range((char *)(uintptr_t)msg.arg.pagefault.address,
 			      pagesize, false);
 	}
 	return arg;
 }

 static int setup_uffd(char *map, size_t size)
 {
 	struct uffdio_api uffdio_api;
 	struct uffdio_register uffdio_register;
 	pthread_t thread;

 	uffd = syscall(__NR_userfaultfd,
 		       O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
 	if (uffd < 0) {
 		fprintf(stderr, "syscall() failed: %d\n", errno);
 		return -errno;
 	}

 	uffdio_api.api = UFFD_API;
 	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
 		fprintf(stderr, "UFFDIO_API failed: %d\n", errno);
 		return -errno;
 	}

 	if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
 		fprintf(stderr, "UFFD_FEATURE_WRITEPROTECT missing\n");
 		return -ENOSYS;
 	}

 	uffdio_register.range.start = (unsigned long) map;
 	uffdio_register.range.len = size;
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) {
 		fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno);
 		return -errno;
 	}

 	pthread_create(&thread, NULL, uffd_thread_fn, NULL);

 	return 0;
 }

 int main(void)
 {
 	const size_t size = 4 * 1024 * 1024ull;
 	char *map, *cur;

 	pagesize = getpagesize();

 	map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
 	if (map == MAP_FAILED) {
 		fprintf(stderr, "mmap() failed\n");
 		return -errno;
 	}

 	if (madvise(map, size, MADV_HUGEPAGE)) {
 		fprintf(stderr, "MADV_HUGEPAGE failed\n");
 		return -errno;
 	}

 	if (setup_uffd(map, size))
 		return 1;

 	/* Read the whole range, populating zeropages. */
 	madvise(map, size, MADV_POPULATE_READ);

 	/* Write-protect the whole range. */
 	uffd_wp_range(map, size, true);

 	/* Make sure uffd-wp triggers on each page. */
 	for (cur = map; cur < map + size; cur += pagesize) {
 		uffd_triggered = false;

 		barrier();
 		/* Trigger a write fault. */
 		*cur = 1;
 		barrier();

 		if (!uffd_triggered) {
 			printf("FAIL: uffd-wp did not trigger\n");
 			return 1;
 		}
 	}

 	printf("PASS: uffd-wp triggered\n");
 	return 0;
 }
---

Fixes: e06f1e1dd499 ("userfaultfd: wp: enabled write protection in userfaultfd API")
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Shaohua Li <shli@fb.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 mm/huge_memory.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4fc43859e59a..032fb0ef9cd1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2037,7 +2037,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgtable_t pgtable;
-	pmd_t _pmd;
+	pmd_t _pmd, old_pmd;
 	int i;
 
 	/*
@@ -2048,7 +2048,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 	 *
 	 * See Documentation/mm/mmu_notifier.rst
 	 */
-	pmdp_huge_clear_flush(vma, haddr, pmd);
+	old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
 
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pmd_populate(mm, &_pmd, pgtable);
@@ -2057,6 +2057,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 		pte_t *pte, entry;
 		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
 		entry = pte_mkspecial(entry);
+		if (pmd_uffd_wp(old_pmd))
+			entry = pte_mkuffd_wp(entry);
 		pte = pte_offset_map(&_pmd, haddr);
 		VM_BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, haddr, pte, entry);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread
* Re: [PATCH v1] mm/userfaultfd: propagate uffd-wp bit when PTE-mapping the huge zeropage
@ 2023-03-03  1:11 kernel test robot
  0 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-03-03  1:11 UTC (permalink / raw)
  Cc: oe-kbuild-all, llvm

In-Reply-To: <20230302175423.589164-1-david@redhat.com>
References: <20230302175423.589164-1-david@redhat.com>
TO: David Hildenbrand <david@redhat.com>
TO: linux-kernel@vger.kernel.org
CC: linux-mm@kvack.org
CC: David Hildenbrand <david@redhat.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linux Memory Management List <linux-mm@kvack.org>
CC: Mike Rapoport <rppt@linux.vnet.ibm.com>
CC: Andrea Arcangeli <aarcange@redhat.com>
CC: Peter Xu <peterx@redhat.com>
CC: Jerome Glisse <jglisse@redhat.com>
CC: Shaohua Li <shli@fb.com>

Hi David,

I love your patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/David-Hildenbrand/mm-userfaultfd-propagate-uffd-wp-bit-when-PTE-mapping-the-huge-zeropage/20230303-015516
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20230302175423.589164-1-david%40redhat.com
patch subject: [PATCH v1] mm/userfaultfd: propagate uffd-wp bit when PTE-mapping the huge zeropage
config: riscv-randconfig-r002-20230303 (https://download.01.org/0day-ci/archive/20230303/202303030949.aMmf1z6R-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project 67409911353323ca5edf2049ef0df54132fa1ca7)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/acd586e505b2e97daab12be8f968f5209b6d8e67
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review David-Hildenbrand/mm-userfaultfd-propagate-uffd-wp-bit-when-PTE-mapping-the-huge-zeropage/20230303-015516
        git checkout acd586e505b2e97daab12be8f968f5209b6d8e67
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030949.aMmf1z6R-lkp@intel.com/

All errors (new ones prefixed by >>):

>> ld.lld: error: vmlinux.a(fs/ext4/xattr_hurd.o):(function ext4_xattr_hurd_get: .text+0x94): relocation R_RISCV_HI20 out of range: 541483 is not in [-524288, 524287]; references ext4_xattr_hurd_get.__if_trace.1
   >>> referenced by xattr_hurd.c:18 (fs/ext4/xattr_hurd.c:18)
   >>> defined in vmlinux.a(fs/ext4/xattr_hurd.o)
--
>> ld.lld: error: vmlinux.a(drivers/gpu/drm/arm/display/komeda/komeda_format_caps.o):(function komeda_get_format_caps: .text+0x68): relocation R_RISCV_HI20 out of range: 541810 is not in [-524288, 524287]; references komeda_get_format_caps.__if_trace.2.0
   >>> referenced by komeda_format_caps.c:15 (drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c:15)
   >>> defined in vmlinux.a(drivers/gpu/drm/arm/display/komeda/komeda_format_caps.o)
--
>> ld.lld: error: vmlinux.a(mm/mempool.o):(function remove_element: .text+0x134): relocation R_RISCV_HI20 out of range: 541399 is not in [-524288, 524287]; references kasan_unpoison_element.__if_trace.44.0
   >>> referenced by mempool.c:160 (mm/mempool.c:160)
   >>> defined in vmlinux.a(mm/mempool.o)
--
>> ld.lld: error: vmlinux.a(mm/mempool.o):(function remove_element: .text+0x158): relocation R_RISCV_HI20 out of range: 525325 is not in [-524288, 524287]; references mempool_alloc_pages
   >>> referenced by mempool.c:160 (mm/mempool.c:160)
   >>> defined in vmlinux.a(mm/mempool.o)
--
>> ld.lld: error: vmlinux.a(mm/mempool.o):(function remove_element: .text+0x18c): relocation R_RISCV_HI20 out of range: 541399 is not in [-524288, 524287]; references kasan_unpoison_element.__if_trace.1
   >>> referenced by mempool.c:158 (mm/mempool.c:158)
   >>> defined in vmlinux.a(mm/mempool.o)
--
>> ld.lld: error: vmlinux.a(mm/mempool.o):(function remove_element: .text+0x22c): relocation R_RISCV_HI20 out of range: 541399 is not in [-524288, 524287]; references remove_element.__if_trace.1
   >>> referenced by mempool.c:162 (mm/mempool.c:162)
   >>> defined in vmlinux.a(mm/mempool.o)
--
>> ld.lld: error: vmlinux.a(fs/ext4/xattr_hurd.o):(function ext4_xattr_hurd_set: .text+0x190): relocation R_RISCV_HI20 out of range: 541483 is not in [-524288, 524287]; references ext4_xattr_hurd_set.__if_trace.1
   >>> referenced by xattr_hurd.c:25 (fs/ext4/xattr_hurd.c:25)
   >>> defined in vmlinux.a(fs/ext4/xattr_hurd.o)
--
>> ld.lld: error: vmlinux.a(mm/mempool.o):(function remove_element: .text+0xac): relocation R_RISCV_HI20 out of range: 540890 is not in [-524288, 524287]; references remove_element.______f.42
   >>> referenced by mempool.c:159 (mm/mempool.c:159)
   >>> defined in vmlinux.a(mm/mempool.o)
--
>> ld.lld: error: vmlinux.a(mm/mempool.o):(function remove_element: .text+0xf4): relocation R_RISCV_HI20 out of range: 525325 is not in [-524288, 524287]; references mempool_kmalloc
   >>> referenced by mempool.c:160 (mm/mempool.c:160)
   >>> defined in vmlinux.a(mm/mempool.o)
--
>> ld.lld: error: vmlinux.a(mm/mempool.o):(function remove_element: .text+0x124): relocation R_RISCV_HI20 out of range: 525325 is not in [-524288, 524287]; references mempool_alloc_slab
   >>> referenced by mempool.c:160 (mm/mempool.c:160)
   >>> defined in vmlinux.a(mm/mempool.o)

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-03-03 14:33 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-02 17:54 [PATCH v1] mm/userfaultfd: propagate uffd-wp bit when PTE-mapping the huge zeropage David Hildenbrand
2023-03-02 22:29 ` Peter Xu
2023-03-03  9:12   ` David Hildenbrand
2023-03-03 14:32     ` Peter Xu
2023-03-03  1:57 ` Andrew Morton
2023-03-03  9:12   ` David Hildenbrand
2023-03-03  1:11 kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.