From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:44115) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aH9Yb-0007uf-62 for qemu-devel@nongnu.org; Thu, 07 Jan 2016 07:20:46 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1aH9YX-0004KF-Tz for qemu-devel@nongnu.org; Thu, 07 Jan 2016 07:20:45 -0500 Received: from szxga03-in.huawei.com ([119.145.14.66]:36986) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aH9YX-0004Ji-1a for qemu-devel@nongnu.org; Thu, 07 Jan 2016 07:20:41 -0500 From: zhanghailiang Date: Thu, 7 Jan 2016 20:20:04 +0800 Message-ID: <1452169208-840-10-git-send-email-zhang.zhanghailiang@huawei.com> In-Reply-To: <1452169208-840-1-git-send-email-zhang.zhanghailiang@huawei.com> References: <1452169208-840-1-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [Qemu-devel] [RFC 09/13] migration/postcopy-ram: fix some helper functions to support userfaultfd write-protect List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: aarcange@redhat.com, zhanghailiang , hanweidong@huawei.com, quintela@redhat.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, amit.shah@redhat.com We will re-use some helper functions for snapshot process, and fix these helper functions to support UFFDIO_WRITEPROTECT_MODE_WP. Signed-off-by: zhanghailiang --- include/migration/migration.h | 2 + include/migration/postcopy-ram.h | 2 +- linux-headers/linux/userfaultfd.h | 21 +++++++++-- migration/postcopy-ram.c | 78 ++++++++++++++++++++++++++++++--------- migration/savevm.c | 5 ++- 5 files changed, 83 insertions(+), 25 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index 1316d22..2312c73 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -87,6 +87,8 @@ struct UserfaultState { int userfault_fd; /* To tell the fault_thread to quit */ int userfault_quit_fd; + /* UFFDIO_REGISTER_MODE_MISSING or UFFDIO_REGISTER_MODE_WP*/ + int mode; }; /* State for the incoming migration */ diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index e30978f..568cbdd 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(void); * Make all of RAM sensitive to accesses to areas that haven't yet been written * and wire up anything necessary to deal with it. */ -int postcopy_ram_enable_notify(UserfaultState *us); +int postcopy_ram_enable_notify(UserfaultState *us, int mode); /* * Initialise postcopy-ram, setting the RAM to a state where we can go into diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 9057d7a..1cc3f44 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -17,7 +17,7 @@ * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ * UFFD_FEATURE_EVENT_FORK) */ -#define UFFD_API_FEATURES (0) +#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -25,7 +25,8 @@ #define UFFD_API_RANGE_IOCTLS \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ - (__u64)1 << _UFFDIO_ZEROPAGE) + (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_WRITEPROTECT) /* * Valid ioctl command number range with this API is from 0x00 to @@ -40,6 +41,7 @@ #define _UFFDIO_WAKE (0x02) #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_WRITEPROTECT (0x05) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -57,6 +59,9 @@ #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ struct uffdio_zeropage) +#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ + struct uffdio_writeprotect) + /* read() structure */ struct uffd_msg { __u8 event; @@ -78,7 +83,7 @@ struct uffd_msg { __u64 reserved3; } reserved; } arg; -} __packed; +} __attribute__((packed)); /* * Start at 0x12 and not at 0 to be more strict against bugs. @@ -105,8 +110,9 @@ struct uffdio_api { * are to be considered implicitly always enabled in all kernels as * long as the uffdio_api.api requested matches UFFD_API. */ -#if 0 /* not available yet */ + #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) +#if 0 #define UFFD_FEATURE_EVENT_FORK (1<<1) #endif __u64 features; @@ -164,4 +170,11 @@ struct uffdio_zeropage { __s64 zeropage; }; +struct uffdio_writeprotect { + struct uffdio_range range; + /* !WP means undo writeprotect. DONTWAKE is valid only with !WP */ +#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) +#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) + __u64 mode; +}; #endif /* _LINUX_USERFAULTFD_H */ diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 38245d4..370197e 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -85,6 +85,11 @@ static bool ufd_version_check(int ufd) return false; } + if (!(api_struct.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { + error_report("Does not support write protect feature"); + return false; + } + return true; } @@ -374,6 +379,31 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis) return 0; } +static int ram_set_pages_wp(uint64_t page_addr, + uint64_t size, + bool remove, + int uffd) +{ + struct uffdio_writeprotect wp_struct; + + memset(&wp_struct, 0, sizeof(wp_struct)); + wp_struct.range.start = (uint64_t)(uintptr_t)page_addr; + wp_struct.range.len = size; + if (remove) { + wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE; + } else { + wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_WP; + } + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_struct)) { + int e = errno; + error_report("%s: %s page_addr: 0x%lx", + __func__, strerror(e), page_addr); + + return -e; + } + return 0; +} + /* * Mark the given area of RAM as requiring notification to unwritten areas * Used as a callback on qemu_ram_foreach_block. @@ -389,18 +419,26 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, { UserfaultState *us = opaque; struct uffdio_register reg_struct; + int ret = 0; reg_struct.range.start = (uintptr_t)host_addr; reg_struct.range.len = length; - reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; + reg_struct.mode = us->mode; /* Now tell our userfault_fd that it's responsible for this area */ if (ioctl(us->userfault_fd, UFFDIO_REGISTER, ®_struct)) { error_report("%s userfault register: %s", __func__, strerror(errno)); return -1; } + /* We need to remove the write permission for pages to enable kernel + * notify us. + */ + if (us->mode == UFFDIO_REGISTER_MODE_WP) { + ret = ram_set_pages_wp((uintptr_t)host_addr, length, false, + us->userfault_fd); + } - return 0; + return ret; } /* @@ -414,8 +452,6 @@ static void *postcopy_ram_fault_thread(void *opaque) size_t hostpagesize = getpagesize(); RAMBlock *rb = NULL; RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ - MigrationIncomingState *mis = container_of(us, MigrationIncomingState, - userfault_state); trace_postcopy_ram_fault_thread_entry(); qemu_sem_post(&us->fault_thread_sem); @@ -487,25 +523,31 @@ static void *postcopy_ram_fault_thread(void *opaque) qemu_ram_get_idstr(rb), rb_offset); - /* - * Send the request to the source - we want to request one - * of our host page sizes (which is >= TPS) - */ - if (rb != last_rb) { - last_rb = rb; - migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - rb_offset, hostpagesize); - } else { - /* Save some space */ - migrate_send_rp_req_pages(mis, NULL, - rb_offset, hostpagesize); + if (us->mode == UFFDIO_REGISTER_MODE_MISSING) { + MigrationIncomingState *mis = container_of(us, + MigrationIncomingState, + userfault_state); + + /* + * Send the request to the source - we want to request one + * of our host page sizes (which is >= TPS) + */ + if (rb != last_rb) { + last_rb = rb; + migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), + rb_offset, hostpagesize); + } else { + /* Save some space */ + migrate_send_rp_req_pages(mis, NULL, + rb_offset, hostpagesize); + } } } trace_postcopy_ram_fault_thread_exit(); return NULL; } -int postcopy_ram_enable_notify(UserfaultState *us) +int postcopy_ram_enable_notify(UserfaultState *us, int mode) { /* Open the fd for the kernel to give us userfaults */ us->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); @@ -514,7 +556,7 @@ int postcopy_ram_enable_notify(UserfaultState *us) strerror(errno)); return -1; } - + us->mode = mode; /* * Although the host check already tested the API, we need to * do the check again as an ABI handshake on the new fd. diff --git a/migration/savevm.c b/migration/savevm.c index a59f216..8fe5328f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -50,7 +50,7 @@ #include "qemu/iov.h" #include "block/snapshot.h" #include "block/qapi.h" - +#include #ifndef ETH_P_RARP #define ETH_P_RARP 0x8035 @@ -1488,7 +1488,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) * However, at this point the CPU shouldn't be running, and the IO * shouldn't be doing anything yet so don't actually expect requests */ - if (postcopy_ram_enable_notify(&mis->userfault_state)) { + if (postcopy_ram_enable_notify(&mis->userfault_state, + UFFDIO_REGISTER_MODE_MISSING)) { return -1; } -- 1.8.3.1