All of lore.kernel.org
 help / color / mirror / Atom feed
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
To: qemu-devel@nongnu.org
Cc: aarcange@redhat.com,
	zhanghailiang <zhang.zhanghailiang@huawei.com>,
	hanweidong@huawei.com, quintela@redhat.com,
	peter.huangpeng@huawei.com, dgilbert@redhat.com,
	amit.shah@redhat.com
Subject: [Qemu-devel] [RFC 09/13] migration/postcopy-ram: fix some helper functions to support userfaultfd write-protect
Date: Thu, 7 Jan 2016 20:20:04 +0800	[thread overview]
Message-ID: <1452169208-840-10-git-send-email-zhang.zhanghailiang@huawei.com> (raw)
In-Reply-To: <1452169208-840-1-git-send-email-zhang.zhanghailiang@huawei.com>

We will re-use some helper functions for snapshot process, and fix these
helper functions to support UFFDIO_WRITEPROTECT_MODE_WP.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
---
 include/migration/migration.h     |  2 +
 include/migration/postcopy-ram.h  |  2 +-
 linux-headers/linux/userfaultfd.h | 21 +++++++++--
 migration/postcopy-ram.c          | 78 ++++++++++++++++++++++++++++++---------
 migration/savevm.c                |  5 ++-
 5 files changed, 83 insertions(+), 25 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1316d22..2312c73 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -87,6 +87,8 @@ struct UserfaultState {
     int       userfault_fd;
     /* To tell the fault_thread to quit */
     int       userfault_quit_fd;
+    /* UFFDIO_REGISTER_MODE_MISSING or UFFDIO_REGISTER_MODE_WP*/
+    int       mode;
 };
 
 /* State for the incoming migration */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index e30978f..568cbdd 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(void);
  * Make all of RAM sensitive to accesses to areas that haven't yet been written
  * and wire up anything necessary to deal with it.
  */
-int postcopy_ram_enable_notify(UserfaultState *us);
+int postcopy_ram_enable_notify(UserfaultState *us, int mode);
 
 /*
  * Initialise postcopy-ram, setting the RAM to a state where we can go into
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index 9057d7a..1cc3f44 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -17,7 +17,7 @@
  * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
  *			      UFFD_FEATURE_EVENT_FORK)
  */
-#define UFFD_API_FEATURES (0)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -25,7 +25,8 @@
 #define UFFD_API_RANGE_IOCTLS			\
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
-	 (__u64)1 << _UFFDIO_ZEROPAGE)
+     (__u64)1 << _UFFDIO_ZEROPAGE | \
+     (__u64)1 << _UFFDIO_WRITEPROTECT)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
@@ -40,6 +41,7 @@
 #define _UFFDIO_WAKE			(0x02)
 #define _UFFDIO_COPY			(0x03)
 #define _UFFDIO_ZEROPAGE		(0x04)
+#define _UFFDIO_WRITEPROTECT    (0x05)
 #define _UFFDIO_API			(0x3F)
 
 /* userfaultfd ioctl ids */
@@ -57,6 +59,9 @@
 #define UFFDIO_ZEROPAGE		_IOWR(UFFDIO, _UFFDIO_ZEROPAGE,	\
 				      struct uffdio_zeropage)
 
+#define UFFDIO_WRITEPROTECT    _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+                     struct uffdio_writeprotect)
+
 /* read() structure */
 struct uffd_msg {
 	__u8	event;
@@ -78,7 +83,7 @@ struct uffd_msg {
 			__u64	reserved3;
 		} reserved;
 	} arg;
-} __packed;
+} __attribute__((packed));
 
 /*
  * Start at 0x12 and not at 0 to be more strict against bugs.
@@ -105,8 +110,9 @@ struct uffdio_api {
 	 * are to be considered implicitly always enabled in all kernels as
 	 * long as the uffdio_api.api requested matches UFFD_API.
 	 */
-#if 0 /* not available yet */
+
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
+#if 0
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
 #endif
 	__u64 features;
@@ -164,4 +170,11 @@ struct uffdio_zeropage {
 	__s64 zeropage;
 };
 
+struct uffdio_writeprotect {
+   struct uffdio_range range;
+   /* !WP means undo writeprotect. DONTWAKE is valid only with !WP */
+#define UFFDIO_WRITEPROTECT_MODE_WP        ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE  ((__u64)1<<1)
+   __u64 mode;
+};
 #endif /* _LINUX_USERFAULTFD_H */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 38245d4..370197e 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -85,6 +85,11 @@ static bool ufd_version_check(int ufd)
         return false;
     }
 
+    if (!(api_struct.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
+        error_report("Does not support write protect feature");
+        return false;
+    }
+
     return true;
 }
 
@@ -374,6 +379,31 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
     return 0;
 }
 
+static int ram_set_pages_wp(uint64_t page_addr,
+                            uint64_t size,
+                            bool remove,
+                            int uffd)
+{
+    struct uffdio_writeprotect wp_struct;
+
+    memset(&wp_struct, 0, sizeof(wp_struct));
+    wp_struct.range.start = (uint64_t)(uintptr_t)page_addr;
+    wp_struct.range.len = size;
+    if (remove) {
+        wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
+    } else {
+        wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+    }
+    if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_struct)) {
+        int e = errno;
+        error_report("%s: %s  page_addr: 0x%lx",
+                     __func__, strerror(e), page_addr);
+
+        return -e;
+    }
+    return 0;
+}
+
 /*
  * Mark the given area of RAM as requiring notification to unwritten areas
  * Used as a  callback on qemu_ram_foreach_block.
@@ -389,18 +419,26 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
 {
     UserfaultState *us = opaque;
     struct uffdio_register reg_struct;
+    int ret = 0;
 
     reg_struct.range.start = (uintptr_t)host_addr;
     reg_struct.range.len = length;
-    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+    reg_struct.mode = us->mode;
 
     /* Now tell our userfault_fd that it's responsible for this area */
     if (ioctl(us->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
         error_report("%s userfault register: %s", __func__, strerror(errno));
         return -1;
     }
+    /* We need to remove the write permission for pages to enable kernel
+    * notify us.
+    */
+    if (us->mode == UFFDIO_REGISTER_MODE_WP) {
+        ret = ram_set_pages_wp((uintptr_t)host_addr, length, false,
+                                us->userfault_fd);
+    }
 
-    return 0;
+    return ret;
 }
 
 /*
@@ -414,8 +452,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
     size_t hostpagesize = getpagesize();
     RAMBlock *rb = NULL;
     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
-    MigrationIncomingState *mis = container_of(us, MigrationIncomingState,
-                                               userfault_state);
 
     trace_postcopy_ram_fault_thread_entry();
     qemu_sem_post(&us->fault_thread_sem);
@@ -487,25 +523,31 @@ static void *postcopy_ram_fault_thread(void *opaque)
                                                 qemu_ram_get_idstr(rb),
                                                 rb_offset);
 
-        /*
-         * Send the request to the source - we want to request one
-         * of our host page sizes (which is >= TPS)
-         */
-        if (rb != last_rb) {
-            last_rb = rb;
-            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
-                                     rb_offset, hostpagesize);
-        } else {
-            /* Save some space */
-            migrate_send_rp_req_pages(mis, NULL,
-                                     rb_offset, hostpagesize);
+        if (us->mode == UFFDIO_REGISTER_MODE_MISSING) {
+            MigrationIncomingState *mis = container_of(us,
+                                                       MigrationIncomingState,
+                                                       userfault_state);
+
+            /*
+             * Send the request to the source - we want to request one
+             * of our host page sizes (which is >= TPS)
+             */
+            if (rb != last_rb) {
+                last_rb = rb;
+                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+                                          rb_offset, hostpagesize);
+            } else {
+                /* Save some space */
+                migrate_send_rp_req_pages(mis, NULL,
+                                          rb_offset, hostpagesize);
+            }
         }
     }
     trace_postcopy_ram_fault_thread_exit();
     return NULL;
 }
 
-int postcopy_ram_enable_notify(UserfaultState *us)
+int postcopy_ram_enable_notify(UserfaultState *us, int mode)
 {
     /* Open the fd for the kernel to give us userfaults */
     us->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@@ -514,7 +556,7 @@ int postcopy_ram_enable_notify(UserfaultState *us)
                      strerror(errno));
         return -1;
     }
-
+    us->mode = mode;
     /*
      * Although the host check already tested the API, we need to
      * do the check again as an ABI handshake on the new fd.
diff --git a/migration/savevm.c b/migration/savevm.c
index a59f216..8fe5328f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -50,7 +50,7 @@
 #include "qemu/iov.h"
 #include "block/snapshot.h"
 #include "block/qapi.h"
-
+#include <linux/userfaultfd.h>
 
 #ifndef ETH_P_RARP
 #define ETH_P_RARP 0x8035
@@ -1488,7 +1488,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
      * However, at this point the CPU shouldn't be running, and the IO
      * shouldn't be doing anything yet so don't actually expect requests
      */
-    if (postcopy_ram_enable_notify(&mis->userfault_state)) {
+    if (postcopy_ram_enable_notify(&mis->userfault_state,
+                                   UFFDIO_REGISTER_MODE_MISSING)) {
         return -1;
     }
 
-- 
1.8.3.1

  parent reply	other threads:[~2016-01-07 12:20 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-07 12:19 [Qemu-devel] [RFC 00/13] Live memory snapshot based on userfaultfd zhanghailiang
2016-01-07 12:19 ` [Qemu-devel] [RFC 01/13] postcopy/migration: Split fault related state into struct UserfaultState zhanghailiang
2016-01-07 12:19 ` [Qemu-devel] [RFC 02/13] migration: Allow the migrate command to work on file: urls zhanghailiang
2016-07-13 16:12   ` Dr. David Alan Gilbert
2016-07-14  5:27     ` Hailiang Zhang
2016-01-07 12:19 ` [Qemu-devel] [RFC 03/13] migration: Allow -incoming " zhanghailiang
2016-01-11 20:02   ` Dr. David Alan Gilbert
2016-01-12 13:04     ` Hailiang Zhang
2016-01-07 12:19 ` [Qemu-devel] [RFC 04/13] migration: Create a snapshot thread to realize saving memory snapshot zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 05/13] migration: implement initialization work for snapshot zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 06/13] QEMUSizedBuffer: Introduce two help functions for qsb zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 07/13] savevm: Split qemu_savevm_state_complete_precopy() into two helper functions zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 08/13] snapshot: Save VM's device state into snapshot file zhanghailiang
2016-01-07 12:20 ` zhanghailiang [this message]
2016-01-07 12:20 ` [Qemu-devel] [RFC 10/13] snapshot: Enable the write-protect notification capability for VM's RAM zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 11/13] snapshot/migration: Save VM's RAM into snapshot file zhanghailiang
2016-01-07 12:20 ` [Qemu-devel] [RFC 12/13] migration/ram: Fix some helper functions' parameter to use PageSearchStatus zhanghailiang
2016-01-11 17:55   ` Dr. David Alan Gilbert
2016-01-12 12:59     ` Hailiang Zhang
2016-01-07 12:20 ` [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage zhanghailiang
2016-07-13 17:52   ` Dr. David Alan Gilbert
2016-07-14  8:02     ` Hailiang Zhang
2016-07-04 12:22 ` [Qemu-devel] [RFC 00/13] Live memory snapshot based on userfaultfd Baptiste Reynal
2016-07-05  1:49   ` Hailiang Zhang
2016-07-05  9:57     ` Baptiste Reynal
2016-07-05 10:27       ` Hailiang Zhang
2016-08-18 15:56         ` Andrea Arcangeli
2016-08-20  6:31           ` Hailiang Zhang
2017-02-27 15:37             ` Christian Pinto
2017-02-28  1:48               ` Hailiang Zhang
2017-02-28  8:30                 ` Christian Pinto
2017-02-28 16:14                 ` Andrea Arcangeli
2017-03-01  1:08                   ` Hailiang Zhang
2017-03-09 11:34             ` [Qemu-devel] [RFC PATCH 0/4] ARM/ARM64 fixes for live " Christian Pinto
2017-03-09 11:34               ` [Qemu-devel] [RFC PATCH 1/4] migration/postcopy-ram: check pagefault flags in userfaultfd thread Christian Pinto
2017-03-09 11:34               ` [Qemu-devel] [RFC PATCH 2/4] migration/ram: Fix for ARM/ARM64 page size Christian Pinto
2017-03-09 11:34               ` [Qemu-devel] [RFC PATCH 3/4] migration: snapshot thread Christian Pinto
2017-03-09 11:34               ` [Qemu-devel] [RFC PATCH 4/4] migration/postcopy-ram: ram_set_pages_wp fix Christian Pinto
2017-03-09 17:46               ` [Qemu-devel] [RFC PATCH 0/4] ARM/ARM64 fixes for live memory snapshot based on userfaultfd Dr. David Alan Gilbert
2017-03-10  8:15                 ` Christian Pinto
2016-09-06  3:39           ` [Qemu-devel] [RFC 00/13] Live " Hailiang Zhang
2016-09-18  2:14             ` Hailiang Zhang
2016-12-08 12:45               ` Hailiang Zhang
2016-07-05 14:59       ` Andrea Arcangeli
2016-07-13 18:02 ` Dr. David Alan Gilbert
2016-07-14 10:24   ` Hailiang Zhang
2016-07-14 11:43     ` Dr. David Alan Gilbert
2016-07-19  6:53       ` Hailiang Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1452169208-840-10-git-send-email-zhang.zhanghailiang@huawei.com \
    --to=zhang.zhanghailiang@huawei.com \
    --cc=aarcange@redhat.com \
    --cc=amit.shah@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=hanweidong@huawei.com \
    --cc=peter.huangpeng@huawei.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.