On Wed, Feb 25, 2015 at 04:51:45PM +0000, Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" > > Provide a check to see if the OS we're running on has all the bits > needed for postcopy. > > Creates postcopy-ram.c which will get most of the other helpers we need. > > Signed-off-by: Dr. David Alan Gilbert > --- > include/migration/postcopy-ram.h | 19 +++++ > migration/Makefile.objs | 2 +- > migration/postcopy-ram.c | 161 +++++++++++++++++++++++++++++++++++++++ > savevm.c | 5 ++ > 4 files changed, 186 insertions(+), 1 deletion(-) > create mode 100644 include/migration/postcopy-ram.h > create mode 100644 migration/postcopy-ram.c > > diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h > new file mode 100644 > index 0000000..d81934f > --- /dev/null > +++ b/include/migration/postcopy-ram.h > @@ -0,0 +1,19 @@ > +/* > + * Postcopy migration for RAM > + * > + * Copyright 2013 Red Hat, Inc. and/or its affiliates > + * > + * Authors: > + * Dave Gilbert > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > +#ifndef QEMU_POSTCOPY_RAM_H > +#define QEMU_POSTCOPY_RAM_H > + > +/* Return true if the host supports everything we need to do postcopy-ram */ > +bool postcopy_ram_supported_by_host(void); > + > +#endif > diff --git a/migration/Makefile.objs b/migration/Makefile.objs > index d929e96..0cac6d7 100644 > --- a/migration/Makefile.objs > +++ b/migration/Makefile.objs > @@ -1,7 +1,7 @@ > common-obj-y += migration.o tcp.o > common-obj-y += vmstate.o > common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o > -common-obj-y += xbzrle.o > +common-obj-y += xbzrle.o postcopy-ram.o > > common-obj-$(CONFIG_RDMA) += rdma.o > common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > new file mode 100644 > index 0000000..a0e20b2 > --- /dev/null > +++ b/migration/postcopy-ram.c > @@ -0,0 +1,161 @@ > +/* > + * Postcopy migration for RAM > + * > + * Copyright 2013-2014 Red Hat, Inc. and/or its affiliates > + * > + * Authors: > + * Dave Gilbert > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +/* > + * Postcopy is a migration technique where the execution flips from the > + * source to the destination before all the data has been copied. > + */ > + > +#include > +#include > +#include > + > +#include "qemu-common.h" > +#include "migration/migration.h" > +#include "migration/postcopy-ram.h" > +#include "sysemu/sysemu.h" > +#include "qemu/error-report.h" > +#include "trace.h" > + > +/* Postcopy needs to detect accesses to pages that haven't yet been copied > + * across, and efficiently map new pages in, the techniques for doing this > + * are target OS specific. > + */ > +#if defined(__linux__) > + > +#include > +#include > +#include > +#include /* for __u64 */ > +#include > + > +#ifdef HOST_X86_64 > +#ifndef __NR_userfaultfd > +#define __NR_userfaultfd 323 Sholdn't this come from the kernel headers imported in the previous patch? Rather than having an arch-specific hack. > +#endif > +#endif > + > +#endif > + > +#if defined(__linux__) && defined(__NR_userfaultfd) > + > +static bool ufd_version_check(int ufd) > +{ > + struct uffdio_api api_struct; > + uint64_t feature_mask; > + > + api_struct.api = UFFD_API; > + if (ioctl(ufd, UFFDIO_API, &api_struct)) { > + perror("postcopy_ram_supported_by_host: UFFDIO_API failed"); This should be error_report() not, perror(), to match qemu conventions, shouldn't it? > + return false; > + } > + > + feature_mask = (__u64)1 << _UFFDIO_REGISTER | > + (__u64)1 << _UFFDIO_UNREGISTER; > + if ((api_struct.ioctls & feature_mask) != feature_mask) { > + error_report("Missing userfault features: %" PRIu64, > + (uint64_t)(~api_struct.ioctls & feature_mask)); > + return false; > + } > + > + return true; > +} > + > +bool postcopy_ram_supported_by_host(void) > +{ > + long pagesize = getpagesize(); > + int ufd = -1; > + bool ret = false; /* Error unless we change it */ > + void *testarea = NULL; > + struct uffdio_register reg_struct; > + struct uffdio_range range_struct; > + uint64_t feature_mask; > + > + if ((1ul << qemu_target_page_bits()) > pagesize) { > + /* The PMI code doesn't yet deal with TPS>HPS */ > + error_report("Target page size bigger than host page size"); > + goto out; > + } > + > + ufd = syscall(__NR_userfaultfd, O_CLOEXEC); > + if (ufd == -1) { > + perror("postcopy_ram_supported_by_host: userfaultfd not available"); And here as well? And several places below. > + goto out; > + } > + > + /* Version and features check */ > + if (!ufd_version_check(ufd)) { > + goto out; > + } > + > + /* > + * We need to check that the ops we need are supported on anon memory > + * To do that we need to register a chunk and see the flags that > + * are returned. > + */ > + testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | > + MAP_ANONYMOUS, -1, 0); > + if (!testarea) { This should be (testarea == MAP_FAILED). Otherwise mmap() failures will always trip the assert below. > + perror("postcopy_ram_supported_by_host: Failed to map test area"); > + goto out; > + } > + g_assert(((size_t)testarea & (pagesize-1)) == 0); > + > + reg_struct.range.start = (uint64_t)(uintptr_t)testarea; > + reg_struct.range.len = (uint64_t)pagesize; > + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; > + > + if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { > + perror("postcopy_ram_supported_by_host userfault register"); > + goto out; > + } > + > + range_struct.start = (uint64_t)(uintptr_t)testarea; > + range_struct.len = (uint64_t)pagesize; I don't think you need the (uint64_t) casts (though you do need the uintptr_t cast). I think the assignment will do an implicit conversion without probvlems. > + if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { > + perror("postcopy_ram_supported_by_host userfault unregister"); > + goto out; > + } > + > + feature_mask = (__u64)1 << _UFFDIO_WAKE | > + (__u64)1 << _UFFDIO_COPY | > + (__u64)1 << _UFFDIO_ZEROPAGE; > + if ((reg_struct.ioctls & feature_mask) != feature_mask) { > + error_report("Missing userfault map features: %" PRIu64, I'm guessing you want PRIx64, in order to make the feature mask at least semi-readable. > + (uint64_t)(~reg_struct.ioctls & feature_mask)); > + goto out; > + } > + > + /* Success! */ > + ret = true; > +out: > + if (testarea) { > + munmap(testarea, pagesize); > + } > + if (ufd != -1) { > + close(ufd); > + } > + return ret; > +} > + > +#else > +/* No target OS support, stubs just fail */ > + > +bool postcopy_ram_supported_by_host(void) > +{ > + error_report("%s: No OS support", __func__); > + return false; > +} > + > +#endif > + > diff --git a/savevm.c b/savevm.c > index e301a0a..2ea4c76 100644 > --- a/savevm.c > +++ b/savevm.c > @@ -33,6 +33,7 @@ > #include "qemu/timer.h" > #include "audio/audio.h" > #include "migration/migration.h" > +#include "migration/postcopy-ram.h" > #include "qemu/sockets.h" > #include "qemu/queue.h" > #include "sysemu/cpus.h" > @@ -1109,6 +1110,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, > return -1; > } > > + if (!postcopy_ram_supported_by_host()) { > + return -1; > + } > + > if (remote_hps != getpagesize()) { > /* > * Some combinations of mismatch are probably possible but it gets -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson