All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org, mst@redhat.com,
	maxime.coquelin@redhat.com, marcandre.lureau@redhat.com,
	peterx@redhat.com, quintela@redhat.com
Cc: aarcange@redhat.com
Subject: [Qemu-devel] [PATCH v6 09/29] postcopy: Allow registering of fd handler
Date: Wed, 14 Mar 2018 11:55:58 +0000	[thread overview]
Message-ID: <20180314115618.28831-10-dgilbert@redhat.com> (raw)
In-Reply-To: <20180314115618.28831-1-dgilbert@redhat.com>

From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Allow other userfaultfd's to be registered into the fault thread
so that handlers for shared memory can get responses.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
---
 migration/migration.c    |   6 ++
 migration/migration.h    |   2 +
 migration/postcopy-ram.c | 209 +++++++++++++++++++++++++++++++++++------------
 migration/postcopy-ram.h |  21 +++++
 migration/trace-events   |   2 +
 5 files changed, 187 insertions(+), 53 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 6a4780ef6f..1f22f463d3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void)
     if (!once) {
         mis_current.state = MIGRATION_STATUS_NONE;
         memset(&mis_current, 0, sizeof(MigrationIncomingState));
+        mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE,
+                                                   sizeof(struct PostCopyFD));
         qemu_mutex_init(&mis_current.rp_mutex);
         qemu_event_init(&mis_current.main_thread_load_event, false);
         once = true;
@@ -177,6 +179,10 @@ void migration_incoming_state_destroy(void)
         qemu_fclose(mis->from_src_file);
         mis->from_src_file = NULL;
     }
+    if (mis->postcopy_remote_fds) {
+        g_array_free(mis->postcopy_remote_fds, TRUE);
+        mis->postcopy_remote_fds = NULL;
+    }
 
     qemu_event_reset(&mis->main_thread_load_event);
 }
diff --git a/migration/migration.h b/migration/migration.h
index 08c5d2ded1..d02a759331 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -51,6 +51,8 @@ struct MigrationIncomingState {
     QemuMutex rp_mutex;    /* We send replies from multiple threads */
     void     *postcopy_tmp_page;
     void     *postcopy_tmp_zero_page;
+    /* PostCopyFD's for external userfaultfds & handlers of shared memory */
+    GArray   *postcopy_remote_fds;
 
     QEMUBH *bh;
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 1089814d54..6ce157741f 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -533,29 +533,44 @@ static void *postcopy_ram_fault_thread(void *opaque)
     MigrationIncomingState *mis = opaque;
     struct uffd_msg msg;
     int ret;
+    size_t index;
     RAMBlock *rb = NULL;
     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 
     trace_postcopy_ram_fault_thread_entry();
     qemu_sem_post(&mis->fault_thread_sem);
 
+    struct pollfd *pfd;
+    size_t pfd_len = 2 + mis->postcopy_remote_fds->len;
+
+    pfd = g_new0(struct pollfd, pfd_len);
+
+    pfd[0].fd = mis->userfault_fd;
+    pfd[0].events = POLLIN;
+    pfd[1].fd = mis->userfault_event_fd;
+    pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
+    trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd);
+    for (index = 0; index < mis->postcopy_remote_fds->len; index++) {
+        struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds,
+                                                 struct PostCopyFD, index);
+        pfd[2 + index].fd = pcfd->fd;
+        pfd[2 + index].events = POLLIN;
+        trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr,
+                                                  pcfd->fd);
+    }
+
     while (true) {
         ram_addr_t rb_offset;
-        struct pollfd pfd[2];
+        int poll_result;
 
         /*
          * We're mainly waiting for the kernel to give us a faulting HVA,
          * however we can be told to quit via userfault_quit_fd which is
          * an eventfd
          */
-        pfd[0].fd = mis->userfault_fd;
-        pfd[0].events = POLLIN;
-        pfd[0].revents = 0;
-        pfd[1].fd = mis->userfault_event_fd;
-        pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
-        pfd[1].revents = 0;
-
-        if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
+
+        poll_result = poll(pfd, pfd_len, -1 /* Wait forever */);
+        if (poll_result == -1) {
             error_report("%s: userfault poll: %s", __func__, strerror(errno));
             break;
         }
@@ -575,57 +590,117 @@ static void *postcopy_ram_fault_thread(void *opaque)
             }
         }
 
-        ret = read(mis->userfault_fd, &msg, sizeof(msg));
-        if (ret != sizeof(msg)) {
-            if (errno == EAGAIN) {
-                /*
-                 * if a wake up happens on the other thread just after
-                 * the poll, there is nothing to read.
-                 */
-                continue;
+        if (pfd[0].revents) {
+            poll_result--;
+            ret = read(mis->userfault_fd, &msg, sizeof(msg));
+            if (ret != sizeof(msg)) {
+                if (errno == EAGAIN) {
+                    /*
+                     * if a wake up happens on the other thread just after
+                     * the poll, there is nothing to read.
+                     */
+                    continue;
+                }
+                if (ret < 0) {
+                    error_report("%s: Failed to read full userfault "
+                                 "message: %s",
+                                 __func__, strerror(errno));
+                    break;
+                } else {
+                    error_report("%s: Read %d bytes from userfaultfd "
+                                 "expected %zd",
+                                 __func__, ret, sizeof(msg));
+                    break; /* Lost alignment, don't know what we'd read next */
+                }
             }
-            if (ret < 0) {
-                error_report("%s: Failed to read full userfault message: %s",
-                             __func__, strerror(errno));
-                break;
-            } else {
-                error_report("%s: Read %d bytes from userfaultfd expected %zd",
-                             __func__, ret, sizeof(msg));
-                break; /* Lost alignment, don't know what we'd read next */
+            if (msg.event != UFFD_EVENT_PAGEFAULT) {
+                error_report("%s: Read unexpected event %ud from userfaultfd",
+                             __func__, msg.event);
+                continue; /* It's not a page fault, shouldn't happen */
             }
-        }
-        if (msg.event != UFFD_EVENT_PAGEFAULT) {
-            error_report("%s: Read unexpected event %ud from userfaultfd",
-                         __func__, msg.event);
-            continue; /* It's not a page fault, shouldn't happen */
-        }
 
-        rb = qemu_ram_block_from_host(
-                 (void *)(uintptr_t)msg.arg.pagefault.address,
-                 true, &rb_offset);
-        if (!rb) {
-            error_report("postcopy_ram_fault_thread: Fault outside guest: %"
-                         PRIx64, (uint64_t)msg.arg.pagefault.address);
-            break;
-        }
+            rb = qemu_ram_block_from_host(
+                     (void *)(uintptr_t)msg.arg.pagefault.address,
+                     true, &rb_offset);
+            if (!rb) {
+                error_report("postcopy_ram_fault_thread: Fault outside guest: %"
+                             PRIx64, (uint64_t)msg.arg.pagefault.address);
+                break;
+            }
 
-        rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
-        trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
+            rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
+            trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                 qemu_ram_get_idstr(rb),
                                                 rb_offset);
+            /*
+             * Send the request to the source - we want to request one
+             * of our host page sizes (which is >= TPS)
+             */
+            if (rb != last_rb) {
+                last_rb = rb;
+                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+                                         rb_offset, qemu_ram_pagesize(rb));
+            } else {
+                /* Save some space */
+                migrate_send_rp_req_pages(mis, NULL,
+                                         rb_offset, qemu_ram_pagesize(rb));
+            }
+        }
 
-        /*
-         * Send the request to the source - we want to request one
-         * of our host page sizes (which is >= TPS)
-         */
-        if (rb != last_rb) {
-            last_rb = rb;
-            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
-                                     rb_offset, qemu_ram_pagesize(rb));
-        } else {
-            /* Save some space */
-            migrate_send_rp_req_pages(mis, NULL,
-                                     rb_offset, qemu_ram_pagesize(rb));
+        /* Now handle any requests from external processes on shared memory */
+        /* TODO: May need to handle devices deregistering during postcopy */
+        for (index = 2; index < pfd_len && poll_result; index++) {
+            if (pfd[index].revents) {
+                struct PostCopyFD *pcfd =
+                    &g_array_index(mis->postcopy_remote_fds,
+                                   struct PostCopyFD, index - 2);
+
+                poll_result--;
+                if (pfd[index].revents & POLLERR) {
+                    error_report("%s: POLLERR on poll %zd fd=%d",
+                                 __func__, index, pcfd->fd);
+                    pfd[index].events = 0;
+                    continue;
+                }
+
+                ret = read(pcfd->fd, &msg, sizeof(msg));
+                if (ret != sizeof(msg)) {
+                    if (errno == EAGAIN) {
+                        /*
+                         * if a wake up happens on the other thread just after
+                         * the poll, there is nothing to read.
+                         */
+                        continue;
+                    }
+                    if (ret < 0) {
+                        error_report("%s: Failed to read full userfault "
+                                     "message: %s (shared) revents=%d",
+                                     __func__, strerror(errno),
+                                     pfd[index].revents);
+                        /*TODO: Could just disable this sharer */
+                        break;
+                    } else {
+                        error_report("%s: Read %d bytes from userfaultfd "
+                                     "expected %zd (shared)",
+                                     __func__, ret, sizeof(msg));
+                        /*TODO: Could just disable this sharer */
+                        break; /*Lost alignment,don't know what we'd read next*/
+                    }
+                }
+                if (msg.event != UFFD_EVENT_PAGEFAULT) {
+                    error_report("%s: Read unexpected event %ud "
+                                 "from userfaultfd (shared)",
+                                 __func__, msg.event);
+                    continue; /* It's not a page fault, shouldn't happen */
+                }
+                /* Call the device handler registered with us */
+                ret = pcfd->handler(pcfd, &msg);
+                if (ret) {
+                    error_report("%s: Failed to resolve shared fault on %zd/%s",
+                                 __func__, index, pcfd->idstr);
+                    /* TODO: Fail? Disable this sharer? */
+                }
+            }
         }
     }
     trace_postcopy_ram_fault_thread_exit();
@@ -970,3 +1045,31 @@ PostcopyState postcopy_state_set(PostcopyState new_state)
 {
     return atomic_xchg(&incoming_postcopy_state, new_state);
 }
+
+/* Register a handler for external shared memory postcopy
+ * called on the destination.
+ */
+void postcopy_register_shared_ufd(struct PostCopyFD *pcfd)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+
+    mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds,
+                                                  *pcfd);
+}
+
+/* Unregister a handler for external shared memory postcopy
+ */
+void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
+{
+    guint i;
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    GArray *pcrfds = mis->postcopy_remote_fds;
+
+    for (i = 0; i < pcrfds->len; i++) {
+        struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
+        if (cur->fd == pcfd->fd) {
+            mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i);
+            return;
+        }
+    }
+}
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 0421c98d57..f21eef6702 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -143,4 +143,25 @@ void postcopy_remove_notifier(NotifierWithReturn *n);
 /* Call the notifier list set by postcopy_add_start_notifier */
 int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp);
 
+struct PostCopyFD;
+
+/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */
+typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd);
+
+struct PostCopyFD {
+    int fd;
+    /* Data to pass to handler */
+    void *data;
+    /* Handler to be called whenever we get a poll event */
+    pcfdhandler handler;
+    /* A string to use in error messages */
+    const char *idstr;
+};
+
+/* Register a userfaultfd owned by an external process for
+ * shared memory.
+ */
+void postcopy_register_shared_ufd(struct PostCopyFD *pcfd);
+void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd);
+
 #endif
diff --git a/migration/trace-events b/migration/trace-events
index 93961dea16..1e617ad7a6 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -190,6 +190,8 @@ postcopy_place_page_zero(void *host_addr) "host=%p"
 postcopy_ram_enable_notify(void) ""
 postcopy_ram_fault_thread_entry(void) ""
 postcopy_ram_fault_thread_exit(void) ""
+postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d"
+postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d"
 postcopy_ram_fault_thread_quit(void) ""
 postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx"
 postcopy_ram_incoming_cleanup_closeuf(void) ""
-- 
2.14.3

  parent reply	other threads:[~2018-03-14 11:57 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-14 11:55 [Qemu-devel] [PATCH v6 00/29] postcopy+vhost-user/shared ram Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 01/29] migrate: Update ram_block_discard_range for shared Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 02/29] qemu_ram_block_host_offset Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 03/29] postcopy: use UFFDIO_ZEROPAGE only when available Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 04/29] postcopy: Add notifier chain Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 05/29] postcopy: Add vhost-user flag for postcopy and check it Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 06/29] vhost-user: Add 'VHOST_USER_POSTCOPY_ADVISE' message Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 07/29] libvhost-user: Support sending fds back to qemu Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 08/29] libvhost-user: Open userfaultfd Dr. David Alan Gilbert (git)
2018-03-14 11:55 ` Dr. David Alan Gilbert (git) [this message]
2018-03-14 11:55 ` [Qemu-devel] [PATCH v6 10/29] vhost+postcopy: Register shared ufd with postcopy Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 11/29] vhost+postcopy: Transmit 'listen' to slave Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 12/29] postcopy+vhost-user: Split set_mem_table for postcopy Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 13/29] migration/ram: ramblock_recv_bitmap_test_byte_offset Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 14/29] libvhost-user+postcopy: Register new regions with the ufd Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 15/29] vhost+postcopy: Send address back to qemu Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 16/29] vhost+postcopy: Stash RAMBlock and offset Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 17/29] vhost+postcopy: Helper to send requests to source for shared pages Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 18/29] vhost+postcopy: Resolve client address Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 19/29] postcopy: helper for waking shared Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 20/29] postcopy: postcopy_notify_shared_wake Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 21/29] vhost+postcopy: Add vhost waker Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 22/29] vhost+postcopy: Call wakeups Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 23/29] libvhost-user: mprotect & madvises for postcopy Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 24/29] vhost-user: Add VHOST_USER_POSTCOPY_END message Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 25/29] vhost+postcopy: Wire up POSTCOPY_END notify Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 26/29] vhost: Huge page align and merge Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 27/29] postcopy: Allow shared memory Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 28/29] libvhost-user: Claim support for postcopy Dr. David Alan Gilbert (git)
2018-03-14 11:56 ` [Qemu-devel] [PATCH v6 29/29] postcopy shared docs Dr. David Alan Gilbert (git)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180314115618.28831-10-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.