[RFC PATCH V3 05/12] xen: Introduce vm_event

From: Tamas K Lengyel <tamas.lengyel@zentific.com>
To: xen-devel@lists.xen.org
Cc: kevin.tian@intel.com, wei.liu2@citrix.com,
	ian.campbell@citrix.com, steve@zentific.com,
	stefano.stabellini@eu.citrix.com, jun.nakajima@intel.com,
	tim@xen.org, ian.jackson@eu.citrix.com, eddie.dong@intel.com,
	andres@lagarcavilla.org, jbeulich@suse.com,
	Tamas K Lengyel <tamas.lengyel@zentific.com>,
	rshriram@cs.ubc.ca, keir@xen.org, dgdegra@tycho.nsa.gov,
	yanghy@cn.fujitsu.com, rcojocaru@bitdefender.com
Subject: [RFC PATCH V3 05/12] xen: Introduce vm_event
Date: Thu, 29 Jan 2015 22:46:31 +0100	[thread overview]
Message-ID: <1422567998-29995-6-git-send-email-tamas.lengyel@zentific.com> (raw)
In-Reply-To: <1422567998-29995-1-git-send-email-tamas.lengyel@zentific.com>

To make it easier to review the renaming process of mem_event -> vm_event,
the process is broken into three pieces, of which this patch is the first.
In this patch the vm_event subsystem is introduced and hooked into the build
process, but it is not yet used anywhere.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
---
 MAINTAINERS                         |   1 +
 docs/misc/xsm-flask.txt             |   1 +
 tools/libxc/Makefile                |   1 +
 tools/libxc/xc_private.h            |  12 +
 tools/libxc/xc_vm_event.c           | 162 ++++++++
 xen/common/Makefile                 |   1 +
 xen/common/vm_event.c               | 739 ++++++++++++++++++++++++++++++++++++
 xen/include/public/domctl.h         |  79 ++++
 xen/include/public/vm_event.h       | 193 ++++++++++
 xen/include/xen/sched.h             |  41 ++
 xen/include/xen/vm_event.h          | 143 +++++++
 xen/include/xsm/dummy.h             |  12 +
 xen/include/xsm/xsm.h               |  12 +
 xen/xsm/dummy.c                     |   2 +
 xen/xsm/flask/hooks.c               |  12 +
 xen/xsm/flask/policy/access_vectors |   1 +
 16 files changed, 1412 insertions(+)
 create mode 100644 tools/libxc/xc_vm_event.c
 create mode 100644 xen/common/vm_event.c
 create mode 100644 xen/include/public/vm_event.h
 create mode 100644 xen/include/xen/vm_event.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3bbac9e..cff3e5f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -364,6 +364,7 @@ F:	tools/memshr
 MEMORY EVENT AND ACCESS
 M:	Tim Deegan <tim@xen.org>
 S:	Supported
+F:	xen/common/vm_event.c
 F:	xen/common/mem_event.c
 F:	xen/common/mem_access.c
 
diff --git a/docs/misc/xsm-flask.txt b/docs/misc/xsm-flask.txt
index 9559028..9eead61 100644
--- a/docs/misc/xsm-flask.txt
+++ b/docs/misc/xsm-flask.txt
@@ -87,6 +87,7 @@ __HYPERVISOR_domctl (xen/include/public/domctl.h)
  * XEN_DOMCTL_set_machine_address_size
  * XEN_DOMCTL_debug_op
  * XEN_DOMCTL_gethvmcontext_partial
+ * XEN_DOMCTL_vm_event_op
  * XEN_DOMCTL_mem_event_op
  * XEN_DOMCTL_mem_sharing_op
  * XEN_DOMCTL_setvcpuextstate
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index bd2ca6c..b97e535 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -26,6 +26,7 @@ CTRL_SRCS-y       += xc_pm.c
 CTRL_SRCS-y       += xc_cpu_hotplug.c
 CTRL_SRCS-y       += xc_resume.c
 CTRL_SRCS-y       += xc_tmem.c
+CTRL_SRCS-y       += xc_vm_event.c
 CTRL_SRCS-y       += xc_mem_event.c
 CTRL_SRCS-y       += xc_mem_paging.c
 CTRL_SRCS-y       += xc_mem_access.c
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index f1f601c..58db86d 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -432,4 +432,16 @@ int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
 void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
                           uint32_t *port, int enable_introspection);
 
+/**
+ * vm_event operations. Internal use only.
+ */
+int xc_vm_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
+                        unsigned int mode, uint32_t *port);
+/*
+ * Enables vm_event and returns the mapped ring page indicated by param.
+ * param can be HVM_PARAM_PAGING/ACCESS/SHARING_RING_PFN
+ */
+void *xc_vm_event_enable(xc_interface *xch, domid_t domain_id, int param,
+                         uint32_t *port, int enable_introspection);
+
 #endif /* __XC_PRIVATE_H__ */
diff --git a/tools/libxc/xc_vm_event.c b/tools/libxc/xc_vm_event.c
new file mode 100644
index 0000000..dda766e
--- /dev/null
+++ b/tools/libxc/xc_vm_event.c
@@ -0,0 +1,162 @@
+/******************************************************************************
+ *
+ * xc_vm_event.c
+ *
+ * Interface to low-level VM event functionality.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xc_private.h"
+
+int xc_vm_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
+                         unsigned int mode, uint32_t *port)
+{
+    DECLARE_DOMCTL;
+    int rc;
+
+    domctl.cmd = XEN_DOMCTL_mem_event_op;
+    domctl.domain = domain_id;
+    domctl.u.vm_event_op.op = op;
+    domctl.u.vm_event_op.mode = mode;
+
+    rc = do_domctl(xch, &domctl);
+    if ( !rc && port )
+        *port = domctl.u.vm_event_op.port;
+    return rc;
+}
+
+void *xc_vm_event_enable(xc_interface *xch, domid_t domain_id, int param,
+                          uint32_t *port, int enable_introspection)
+{
+    void *ring_page = NULL;
+    uint64_t pfn;
+    xen_pfn_t ring_pfn, mmap_pfn;
+    unsigned int op, mode;
+    int rc1, rc2, saved_errno;
+
+    if ( !port )
+    {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    /* Pause the domain for ring page setup */
+    rc1 = xc_domain_pause(xch, domain_id);
+    if ( rc1 != 0 )
+    {
+        PERROR("Unable to pause domain\n");
+        return NULL;
+    }
+
+    /* Get the pfn of the ring page */
+    rc1 = xc_hvm_param_get(xch, domain_id, param, &pfn);
+    if ( rc1 != 0 )
+    {
+        PERROR("Failed to get pfn of ring page\n");
+        goto out;
+    }
+
+    ring_pfn = pfn;
+    mmap_pfn = pfn;
+    ring_page = xc_map_foreign_batch(xch, domain_id, PROT_READ | PROT_WRITE,
+                                     &mmap_pfn, 1);
+    if ( mmap_pfn & XEN_DOMCTL_PFINFO_XTAB )
+    {
+        /* Map failed, populate ring page */
+        rc1 = xc_domain_populate_physmap_exact(xch, domain_id, 1, 0, 0,
+                                              &ring_pfn);
+        if ( rc1 != 0 )
+        {
+            PERROR("Failed to populate ring pfn\n");
+            goto out;
+        }
+
+        mmap_pfn = ring_pfn;
+        ring_page = xc_map_foreign_batch(xch, domain_id, PROT_READ | PROT_WRITE,
+                                         &mmap_pfn, 1);
+        if ( mmap_pfn & XEN_DOMCTL_PFINFO_XTAB )
+        {
+            PERROR("Could not map the ring page\n");
+            goto out;
+        }
+    }
+
+    switch ( param )
+    {
+    case HVM_PARAM_PAGING_RING_PFN:
+        op = XEN_DOMCTL_VM_EVENT_OP_PAGING_ENABLE;
+        mode = XEN_DOMCTL_VM_EVENT_OP_PAGING;
+        break;
+
+    case HVM_PARAM_MONITOR_RING_PFN:
+        if ( enable_introspection )
+            op = XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION;
+        else
+            op = XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE;
+        mode = XEN_DOMCTL_VM_EVENT_OP_MONITOR;
+        break;
+
+    case HVM_PARAM_SHARING_RING_PFN:
+        op = XEN_DOMCTL_VM_EVENT_OP_SHARING_ENABLE;
+        mode = XEN_DOMCTL_VM_EVENT_OP_SHARING;
+        break;
+
+    /*
+     * This is for the outside chance that the HVM_PARAM is valid but is invalid
+     * as far as vm_event goes.
+     */
+    default:
+        errno = EINVAL;
+        rc1 = -1;
+        goto out;
+    }
+
+    rc1 = xc_vm_event_control(xch, domain_id, op, mode, port);
+    if ( rc1 != 0 )
+    {
+        PERROR("Failed to enable vm_event\n");
+        goto out;
+    }
+
+    /* Remove the ring_pfn from the guest's physmap */
+    rc1 = xc_domain_decrease_reservation_exact(xch, domain_id, 1, 0, &ring_pfn);
+    if ( rc1 != 0 )
+        PERROR("Failed to remove ring page from guest physmap");
+
+ out:
+    saved_errno = errno;
+
+    rc2 = xc_domain_unpause(xch, domain_id);
+    if ( rc1 != 0 || rc2 != 0 )
+    {
+        if ( rc2 != 0 )
+        {
+            if ( rc1 == 0 )
+                saved_errno = errno;
+            PERROR("Unable to unpause domain");
+        }
+
+        if ( ring_page )
+            munmap(ring_page, XC_PAGE_SIZE);
+        ring_page = NULL;
+
+        errno = saved_errno;
+    }
+
+    return ring_page;
+}
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 1956091..0db6967 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -55,6 +55,7 @@ obj-y += lzo.o
 obj-$(HAS_PDX) += pdx.o
 obj-$(HAS_MEM_ACCESS) += mem_access.o
 obj-$(HAS_MEM_ACCESS) += mem_event.o
+obj-$(HAS_MEM_ACCESS) += vm_event.o
 
 obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo unlz4 earlycpio,$(n).init.o)
 
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
new file mode 100644
index 0000000..0db899e
--- /dev/null
+++ b/xen/common/vm_event.c
@@ -0,0 +1,739 @@
+/******************************************************************************
+ * vm_event.c
+ *
+ * VM event support.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/wait.h>
+#include <xen/vm_event.h>
+#include <xen/mem_access.h>
+#include <asm/p2m.h>
+
+#ifdef HAS_MEM_PAGING
+#include <asm/mem_paging.h>
+#endif
+
+#ifdef HAS_MEM_SHARING
+#include <asm/mem_sharing.h>
+#endif
+
+#include <xsm/xsm.h>
+
+/* for public/io/ring.h macros */
+#define xen_mb()   mb()
+#define xen_rmb()  rmb()
+#define xen_wmb()  wmb()
+
+#define vm_event_ring_lock_init(_ved)  spin_lock_init(&(_ved)->ring_lock)
+#define vm_event_ring_lock(_ved)       spin_lock(&(_ved)->ring_lock)
+#define vm_event_ring_unlock(_ved)     spin_unlock(&(_ved)->ring_lock)
+
+static int vm_event_enable(
+    struct domain *d,
+    xen_domctl_vm_event_op_t *vec,
+    struct vm_event_domain *ved,
+    int pause_flag,
+    int param,
+    xen_event_channel_notification_t notification_fn)
+{
+    int rc;
+    unsigned long ring_gfn = d->arch.hvm_domain.params[param];
+
+    /* Only one helper at a time. If the helper crashed,
+     * the ring is in an undefined state and so is the guest.
+     */
+    if ( ved->ring_page )
+        return -EBUSY;
+
+    /* The parameter defaults to zero, and it should be
+     * set to something */
+    if ( ring_gfn == 0 )
+        return -ENOSYS;
+
+    vm_event_ring_lock_init(ved);
+    vm_event_ring_lock(ved);
+
+    rc = prepare_ring_for_helper(d, ring_gfn, &ved->ring_pg_struct,
+                                    &ved->ring_page);
+    if ( rc < 0 )
+        goto err;
+
+    /* Set the number of currently blocked vCPUs to 0. */
+    ved->blocked = 0;
+
+    /* Allocate event channel */
+    rc = alloc_unbound_xen_event_channel(d->vcpu[0],
+                                         current->domain->domain_id,
+                                         notification_fn);
+    if ( rc < 0 )
+        goto err;
+
+    ved->xen_port = vec->port = rc;
+
+    /* Prepare ring buffer */
+    FRONT_RING_INIT(&ved->front_ring,
+                    (vm_event_sring_t *)ved->ring_page,
+                    PAGE_SIZE);
+
+    /* Save the pause flag for this particular ring. */
+    ved->pause_flag = pause_flag;
+
+    /* Initialize the last-chance wait queue. */
+    init_waitqueue_head(&ved->wq);
+
+    vm_event_ring_unlock(ved);
+    return 0;
+
+ err:
+    destroy_ring_for_helper(&ved->ring_page,
+                            ved->ring_pg_struct);
+    vm_event_ring_unlock(ved);
+
+    return rc;
+}
+
+static unsigned int vm_event_ring_available(struct vm_event_domain *ved)
+{
+    int avail_req = RING_FREE_REQUESTS(&ved->front_ring);
+    avail_req -= ved->target_producers;
+    avail_req -= ved->foreign_producers;
+
+    BUG_ON(avail_req < 0);
+
+    return avail_req;
+}
+
+/*
+ * vm_event_wake_blocked() will wakeup vcpus waiting for room in the
+ * ring. These vCPUs were paused on their way out after placing an event,
+ * but need to be resumed where the ring is capable of processing at least
+ * one event from them.
+ */
+static void vm_event_wake_blocked(struct domain *d, struct vm_event_domain *ved)
+{
+    struct vcpu *v;
+    int online = d->max_vcpus;
+    unsigned int avail_req = vm_event_ring_available(ved);
+
+    if ( avail_req == 0 || ved->blocked == 0 )
+        return;
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit).
+     * See comment below in vm_event_put_request().
+     */
+    for_each_vcpu ( d, v )
+        if ( test_bit(ved->pause_flag, &v->pause_flags) )
+            online--;
+
+    ASSERT(online == (d->max_vcpus - ved->blocked));
+
+    /* We remember which vcpu last woke up to avoid scanning always linearly
+     * from zero and starving higher-numbered vcpus under high load */
+    if ( d->vcpu )
+    {
+        int i, j, k;
+
+        for (i = ved->last_vcpu_wake_up + 1, j = 0; j < d->max_vcpus; i++, j++)
+        {
+            k = i % d->max_vcpus;
+            v = d->vcpu[k];
+            if ( !v )
+                continue;
+
+            if ( !(ved->blocked) || online >= avail_req )
+               break;
+
+            if ( test_and_clear_bit(ved->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                online++;
+                ved->blocked--;
+                ved->last_vcpu_wake_up = k;
+            }
+        }
+    }
+}
+
+/*
+ * In the event that a vCPU attempted to place an event in the ring and
+ * was unable to do so, it is queued on a wait queue.  These are woken as
+ * needed, and take precedence over the blocked vCPUs.
+ */
+static void vm_event_wake_queued(struct domain *d, struct vm_event_domain *ved)
+{
+    unsigned int avail_req = vm_event_ring_available(ved);
+
+    if ( avail_req > 0 )
+        wake_up_nr(&ved->wq, avail_req);
+}
+
+/*
+ * vm_event_wake() will wakeup all vcpus waiting for the ring to
+ * become available.  If we have queued vCPUs, they get top priority. We
+ * are guaranteed that they will go through code paths that will eventually
+ * call vm_event_wake() again, ensuring that any blocked vCPUs will get
+ * unpaused once all the queued vCPUs have made it through.
+ */
+void vm_event_wake(struct domain *d, struct vm_event_domain *ved)
+{
+    if (!list_empty(&ved->wq.list))
+        vm_event_wake_queued(d, ved);
+    else
+        vm_event_wake_blocked(d, ved);
+}
+
+static int vm_event_disable(struct domain *d, struct vm_event_domain *ved)
+{
+    if ( ved->ring_page )
+    {
+        struct vcpu *v;
+
+        vm_event_ring_lock(ved);
+
+        if ( !list_empty(&ved->wq.list) )
+        {
+            vm_event_ring_unlock(ved);
+            return -EBUSY;
+        }
+
+        /* Free domU's event channel and leave the other one unbound */
+        free_xen_event_channel(d->vcpu[0], ved->xen_port);
+
+        /* Unblock all vCPUs */
+        for_each_vcpu ( d, v )
+        {
+            if ( test_and_clear_bit(ved->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                ved->blocked--;
+            }
+        }
+
+        destroy_ring_for_helper(&ved->ring_page,
+                                ved->ring_pg_struct);
+        vm_event_ring_unlock(ved);
+    }
+
+    return 0;
+}
+
+static inline void vm_event_release_slot(struct domain *d,
+                                          struct vm_event_domain *ved)
+{
+    /* Update the accounting */
+    if ( current->domain == d )
+        ved->target_producers--;
+    else
+        ved->foreign_producers--;
+
+    /* Kick any waiters */
+    vm_event_wake(d, ved);
+}
+
+/*
+ * vm_event_mark_and_pause() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in vm_event_wake_waiters().
+ */
+void vm_event_mark_and_pause(struct vcpu *v, struct vm_event_domain *ved)
+{
+    if ( !test_and_set_bit(ved->pause_flag, &v->pause_flags) )
+    {
+        vcpu_pause_nosync(v);
+        ved->blocked++;
+    }
+}
+
+/*
+ * This must be preceded by a call to claim_slot(), and is guaranteed to
+ * succeed.  As a side-effect however, the vCPU may be paused if the ring is
+ * overly full and its continued execution would cause stalling and excessive
+ * waiting.  The vCPU will be automatically unpaused when the ring clears.
+ */
+void vm_event_put_request(struct domain *d,
+                           struct vm_event_domain *ved,
+                           vm_event_request_t *req)
+{
+    vm_event_front_ring_t *front_ring;
+    int free_req;
+    unsigned int avail_req;
+    RING_IDX req_prod;
+
+    if ( current->domain != d )
+    {
+        req->flags |= VM_EVENT_FLAG_FOREIGN;
+#ifndef NDEBUG
+        if ( !(req->flags & VM_EVENT_FLAG_VCPU_PAUSED) )
+            gdprintk(XENLOG_G_WARNING, "d%dv%d was not paused.\n",
+                     d->domain_id, req->vcpu_id);
+#endif
+    }
+
+    vm_event_ring_lock(ved);
+
+    /* Due to the reservations, this step must succeed. */
+    front_ring = &ved->front_ring;
+    free_req = RING_FREE_REQUESTS(front_ring);
+    ASSERT(free_req > 0);
+
+    /* Copy request */
+    req_prod = front_ring->req_prod_pvt;
+    memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
+    req_prod++;
+
+    /* Update ring */
+    front_ring->req_prod_pvt = req_prod;
+    RING_PUSH_REQUESTS(front_ring);
+
+    /* We've actually *used* our reservation, so release the slot. */
+    vm_event_release_slot(d, ved);
+
+    /* Give this vCPU a black eye if necessary, on the way out.
+     * See the comments above wake_blocked() for more information
+     * on how this vechanism works to avoid waiting. */
+    avail_req = vm_event_ring_available(ved);
+    if( current->domain == d && avail_req < d->max_vcpus )
+        vm_event_mark_and_pause(current, ved);
+
+    vm_event_ring_unlock(ved);
+
+    notify_via_xen_event_channel(d, ved->xen_port);
+}
+
+int vm_event_get_response(struct domain *d, struct vm_event_domain *ved, vm_event_response_t *rsp)
+{
+    vm_event_front_ring_t *front_ring;
+    RING_IDX rsp_cons;
+
+    vm_event_ring_lock(ved);
+
+    front_ring = &ved->front_ring;
+    rsp_cons = front_ring->rsp_cons;
+
+    if ( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) )
+    {
+        vm_event_ring_unlock(ved);
+        return 0;
+    }
+
+    /* Copy response */
+    memcpy(rsp, RING_GET_RESPONSE(front_ring, rsp_cons), sizeof(*rsp));
+    rsp_cons++;
+
+    /* Update ring */
+    front_ring->rsp_cons = rsp_cons;
+    front_ring->sring->rsp_event = rsp_cons + 1;
+
+    /* Kick any waiters -- since we've just consumed an event,
+     * there may be additional space available in the ring. */
+    vm_event_wake(d, ved);
+
+    vm_event_ring_unlock(ved);
+
+    return 1;
+}
+
+void vm_event_cancel_slot(struct domain *d, struct vm_event_domain *ved)
+{
+    vm_event_ring_lock(ved);
+    vm_event_release_slot(d, ved);
+    vm_event_ring_unlock(ved);
+}
+
+static int vm_event_grab_slot(struct vm_event_domain *ved, int foreign)
+{
+    unsigned int avail_req;
+
+    if ( !ved->ring_page )
+        return -ENOSYS;
+
+    vm_event_ring_lock(ved);
+
+    avail_req = vm_event_ring_available(ved);
+    if ( avail_req == 0 )
+    {
+        vm_event_ring_unlock(ved);
+        return -EBUSY;
+    }
+
+    if ( !foreign )
+        ved->target_producers++;
+    else
+        ved->foreign_producers++;
+
+    vm_event_ring_unlock(ved);
+
+    return 0;
+}
+
+/* Simple try_grab wrapper for use in the wait_event() macro. */
+static int vm_event_wait_try_grab(struct vm_event_domain *ved, int *rc)
+{
+    *rc = vm_event_grab_slot(ved, 0);
+    return *rc;
+}
+
+/* Call vm_event_grab_slot() until the ring doesn't exist, or is available. */
+static int vm_event_wait_slot(struct vm_event_domain *ved)
+{
+    int rc = -EBUSY;
+    wait_event(ved->wq, vm_event_wait_try_grab(ved, &rc) != -EBUSY);
+    return rc;
+}
+
+bool_t vm_event_check_ring(struct vm_event_domain *ved)
+{
+    return (ved->ring_page != NULL);
+}
+
+/*
+ * Determines whether or not the current vCPU belongs to the target domain,
+ * and calls the appropriate wait function.  If it is a guest vCPU, then we
+ * use vm_event_wait_slot() to reserve a slot.  As long as there is a ring,
+ * this function will always return 0 for a guest.  For a non-guest, we check
+ * for space and return -EBUSY if the ring is not available.
+ *
+ * Return codes: -ENOSYS: the ring is not yet configured
+ *               -EBUSY: the ring is busy
+ *               0: a spot has been reserved
+ *
+ */
+int __vm_event_claim_slot(struct domain *d, struct vm_event_domain *ved,
+                            bool_t allow_sleep)
+{
+    if ( (current->domain == d) && allow_sleep )
+        return vm_event_wait_slot(ved);
+    else
+        return vm_event_grab_slot(ved, (current->domain != d));
+}
+
+#ifdef HAS_MEM_PAGING
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_paging_notification(struct vcpu *v, unsigned int port)
+{
+    if ( likely(v->domain->vm_event->paging.ring_page != NULL) )
+        p2m_mem_paging_resume(v->domain);
+}
+#endif
+
+#ifdef HAS_MEM_ACCESS
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_access_notification(struct vcpu *v, unsigned int port)
+{
+    if ( likely(v->domain->vm_event->monitor.ring_page != NULL) )
+        mem_access_resume(v->domain);
+}
+#endif
+
+#ifdef HAS_MEM_SHARING
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_sharing_notification(struct vcpu *v, unsigned int port)
+{
+    if ( likely(v->domain->vm_event->share.ring_page != NULL) )
+        mem_sharing_sharing_resume(v->domain);
+}
+#endif
+
+int do_vm_event_op(int op, uint32_t domain, void *arg)
+{
+    int ret;
+    struct domain *d;
+
+    ret = rcu_lock_live_remote_domain_by_id(domain, &d);
+    if ( ret )
+        return ret;
+
+    ret = xsm_vm_event_op(XSM_DM_PRIV, d, op);
+    if ( ret )
+        goto out;
+
+    switch (op)
+    {
+#ifdef HAS_MEM_PAGING
+        case XENMEM_paging_op:
+            ret = mem_paging_memop(d, arg);
+            break;
+#endif
+#ifdef HAS_MEM_SHARING
+        case XENMEM_sharing_op:
+            ret = mem_sharing_memop(d, arg);
+            break;
+#endif
+        default:
+            ret = -ENOSYS;
+    }
+
+ out:
+    rcu_unlock_domain(d);
+    return ret;
+}
+
+/* Clean up on domain destruction */
+void vm_event_cleanup(struct domain *d)
+{
+#ifdef HAS_MEM_PAGING
+    if ( d->vm_event->paging.ring_page ) {
+        /* Destroying the wait queue head means waking up all
+         * queued vcpus. This will drain the list, allowing
+         * the disable routine to complete. It will also drop
+         * all domain refs the wait-queued vcpus are holding.
+         * Finally, because this code path involves previously
+         * pausing the domain (domain_kill), unpausing the
+         * vcpus causes no harm. */
+        destroy_waitqueue_head(&d->vm_event->paging.wq);
+        (void)vm_event_disable(d, &d->vm_event->paging);
+    }
+#endif
+#ifdef HAS_MEM_ACCESS
+    if ( d->vm_event->monitor.ring_page ) {
+        destroy_waitqueue_head(&d->vm_event->monitor.wq);
+        (void)vm_event_disable(d, &d->vm_event->monitor);
+    }
+#endif
+#ifdef HAS_MEM_SHARING
+    if ( d->vm_event->share.ring_page ) {
+        destroy_waitqueue_head(&d->vm_event->share.wq);
+        (void)vm_event_disable(d, &d->vm_event->share);
+    }
+#endif
+}
+
+int vm_event_domctl(struct domain *d, xen_domctl_vm_event_op_t *vec,
+                     XEN_GUEST_HANDLE_PARAM(void) u_domctl)
+{
+    int rc;
+
+    rc = xsm_vm_event_control(XSM_PRIV, d, vec->mode, vec->op);
+    if ( rc )
+        return rc;
+
+    if ( unlikely(d == current->domain) )
+    {
+        gdprintk(XENLOG_INFO, "Tried to do a memory event op on itself.\n");
+        return -EINVAL;
+    }
+
+    if ( unlikely(d->is_dying) )
+    {
+        gdprintk(XENLOG_INFO, "Ignoring memory event op on dying domain %u\n",
+                 d->domain_id);
+        return 0;
+    }
+
+    if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
+    {
+        gdprintk(XENLOG_INFO,
+                 "Memory event op on a domain (%u) with no vcpus\n",
+                 d->domain_id);
+        return -EINVAL;
+    }
+
+    rc = -ENOSYS;
+
+    switch ( vec->mode )
+    {
+#ifdef HAS_MEM_PAGING
+    case XEN_DOMCTL_VM_EVENT_OP_PAGING:
+    {
+        struct vm_event_domain *ved = &d->vm_event->paging;
+        rc = -EINVAL;
+
+        switch( vec->op )
+        {
+        case XEN_DOMCTL_VM_EVENT_OP_PAGING_ENABLE:
+        {
+            struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+            rc = -EOPNOTSUPP;
+            /* pvh fixme: p2m_is_foreign types need addressing */
+            if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) )
+                break;
+
+            rc = -ENODEV;
+            /* Only HAP is supported */
+            if ( !hap_enabled(d) )
+                break;
+
+            /* No paging if iommu is used */
+            rc = -EMLINK;
+            if ( unlikely(need_iommu(d)) )
+                break;
+
+            rc = -EXDEV;
+            /* Disallow paging in a PoD guest */
+            if ( p2m->pod.entry_count )
+                break;
+
+            rc = vm_event_enable(d, vec, ved, _VPF_mem_paging,
+                                    HVM_PARAM_PAGING_RING_PFN,
+                                    mem_paging_notification);
+        }
+        break;
+
+        case XEN_DOMCTL_VM_EVENT_OP_PAGING_DISABLE:
+        {
+            if ( ved->ring_page )
+                rc = vm_event_disable(d, ved);
+        }
+        break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+    }
+    break;
+#endif
+
+#ifdef HAS_MEM_ACCESS
+    case XEN_DOMCTL_VM_EVENT_OP_MONITOR:
+    {
+        struct vm_event_domain *ved = &d->vm_event->monitor;
+        rc = -EINVAL;
+
+        switch( vec->op )
+        {
+        case XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE:
+        case XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION:
+        {
+            rc = vm_event_enable(d, vec, ved, _VPF_mem_access,
+                                    HVM_PARAM_MONITOR_RING_PFN,
+                                    mem_access_notification);
+
+            if ( vec->op == XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION
+                 && !rc )
+                p2m_setup_introspection(d);
+
+        }
+        break;
+
+        case XEN_DOMCTL_VM_EVENT_OP_MONITOR_DISABLE:
+        {
+            if ( ved->ring_page )
+            {
+                rc = vm_event_disable(d, ved);
+                d->arch.hvm_domain.introspection_enabled = 0;
+            }
+        }
+        break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+    }
+    break;
+#endif
+
+#ifdef HAS_MEM_SHARING
+    case XEN_DOMCTL_VM_EVENT_OP_SHARING:
+    {
+        struct vm_event_domain *ved = &d->vm_event->share;
+        rc = -EINVAL;
+
+        switch( vec->op )
+        {
+        case XEN_DOMCTL_VM_EVENT_OP_SHARING_ENABLE:
+        {
+            rc = -EOPNOTSUPP;
+            /* pvh fixme: p2m_is_foreign types need addressing */
+            if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) )
+                break;
+
+            rc = -ENODEV;
+            /* Only HAP is supported */
+            if ( !hap_enabled(d) )
+                break;
+
+            rc = vm_event_enable(d, vec, ved, _VPF_mem_sharing,
+                                    HVM_PARAM_SHARING_RING_PFN,
+                                    mem_sharing_notification);
+        }
+        break;
+
+        case XEN_DOMCTL_VM_EVENT_OP_SHARING_DISABLE:
+        {
+            if ( ved->ring_page )
+                rc = vm_event_disable(d, ved);
+        }
+        break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+    }
+    break;
+#endif
+
+    default:
+        rc = -ENOSYS;
+    }
+
+    return rc;
+}
+
+void vm_event_vcpu_pause(struct vcpu *v)
+{
+    ASSERT(v == current);
+
+    atomic_inc(&v->vm_event_pause_count);
+    vcpu_pause_nosync(v);
+}
+
+void vm_event_vcpu_unpause(struct vcpu *v)
+{
+    int old, new, prev = v->vm_event_pause_count.counter;
+
+    /* All unpause requests as a result of toolstack responses.  Prevent
+     * underflow of the vcpu pause count. */
+    do
+    {
+        old = prev;
+        new = old - 1;
+
+        if ( new < 0 )
+        {
+            printk(XENLOG_G_WARNING
+                   "%pv vm_event: Too many unpause attempts\n", v);
+            return;
+        }
+
+        prev = cmpxchg(&v->vm_event_pause_count.counter, old, new);
+    } while ( prev != old );
+
+    vcpu_unpause(v);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 596a624..a7d3e94 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -835,6 +835,84 @@ typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t);
 
 /*
+ * VM event operations
+ */
+
+/* XEN_DOMCTL_vm_event_op */
+
+/*
+ * Domain memory paging
+ * Page memory in and out.
+ * Domctl interface to set up and tear down the
+ * pager<->hypervisor interface. Use XENMEM_paging_op*
+ * to perform per-page operations.
+ *
+ * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several
+ * non-standard error codes to indicate why paging could not be enabled:
+ * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
+ * EMLINK - guest has iommu passthrough enabled
+ * EXDEV  - guest has PoD enabled
+ * EBUSY  - guest has or had paging enabled, ring buffer still active
+ */
+#define XEN_DOMCTL_VM_EVENT_OP_PAGING            1
+
+#define XEN_DOMCTL_VM_EVENT_OP_PAGING_ENABLE     0
+#define XEN_DOMCTL_VM_EVENT_OP_PAGING_DISABLE    1
+
+/*
+ * Monitor permissions.
+ *
+ * As with paging, use the domctl for teardown/setup of the
+ * helper<->hypervisor interface.
+ *
+ * There are HVM hypercalls to set the per-page access permissions of every
+ * page in a domain.  When one of these permissions--independent, read,
+ * write, and execute--is violated, the VCPU is paused and a memory event
+ * is sent with what happened.  (See public/vm_event.h) .
+ *
+ * The memory event handler can then resume the VCPU and redo the access
+ * with a XENMEM_access_op_resume hypercall.
+ *
+ * The XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE domctl returns several
+ * non-standard error codes to indicate why access could not be enabled:
+ * EBUSY  - guest has or had access enabled, ring buffer still active
+ */
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR                        2
+
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE                 0
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR_DISABLE                1
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION   2
+
+/*
+ * Sharing ENOMEM helper.
+ *
+ * As with paging, use the domctl for teardown/setup of the
+ * helper<->hypervisor interface.
+ *
+ * If setup, this ring is used to communicate failed allocations
+ * in the unshare path. XENMEM_sharing_op_resume is used to wake up
+ * vcpus that could not unshare.
+ *
+ * Note that shring can be turned on (as per the domctl below)
+ * *without* this ring being setup.
+ */
+#define XEN_DOMCTL_VM_EVENT_OP_SHARING           3
+
+#define XEN_DOMCTL_VM_EVENT_OP_SHARING_ENABLE    0
+#define XEN_DOMCTL_VM_EVENT_OP_SHARING_DISABLE   1
+
+/* Use for teardown/setup of helper<->hypervisor interface for paging,
+ * access and sharing.*/
+struct xen_domctl_vm_event_op {
+    uint32_t       op;           /* XEN_DOMCTL_VM_EVENT_OP_*_* */
+    uint32_t       mode;         /* XEN_DOMCTL_VM_EVENT_OP_* */
+
+    uint32_t port;              /* OUT: event channel for ring */
+};
+typedef struct xen_domctl_vm_event_op xen_domctl_vm_event_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vm_event_op_t);
+
+/*
  * Memory sharing operations
  */
 /* XEN_DOMCTL_mem_sharing_op.
@@ -1123,6 +1201,7 @@ struct xen_domctl {
         struct xen_domctl_set_target        set_target;
         struct xen_domctl_subscribe         subscribe;
         struct xen_domctl_debug_op          debug_op;
+        struct xen_domctl_vm_event_op       vm_event_op;
         struct xen_domctl_mem_event_op      mem_event_op;
         struct xen_domctl_mem_sharing_op    mem_sharing_op;
 #if defined(__i386__) || defined(__x86_64__)
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
new file mode 100644
index 0000000..8fba3d1b
--- /dev/null
+++ b/xen/include/public/vm_event.h
@@ -0,0 +1,193 @@
+/******************************************************************************
+ * vm_event.h
+ *
+ * VM event common structures.
+ *
+ * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _XEN_PUBLIC_VM_EVENT_H
+#define _XEN_PUBLIC_VM_EVENT_H
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#include "xen.h"
+#include "io/ring.h"
+
+#define VM_EVENT_INTERFACE_VERSION 0x00000001
+
+/* Memory event flags */
+#define VM_EVENT_FLAG_VCPU_PAUSED     (1 << 0)
+#define VM_EVENT_FLAG_DROP_PAGE       (1 << 1)
+#define VM_EVENT_FLAG_EVICT_FAIL      (1 << 2)
+#define VM_EVENT_FLAG_FOREIGN         (1 << 3)
+#define VM_EVENT_FLAG_DUMMY           (1 << 4)
+/*
+ * Emulate the fault-causing instruction (if set in the event response flags).
+ * This will allow the guest to continue execution without lifting the page
+ * access restrictions.
+ */
+#define VM_EVENT_FLAG_EMULATE         (1 << 5)
+/*
+ * Same as VM_EVENT_FLAG_EMULATE, but with write operations or operations
+ * potentially having side effects (like memory mapped or port I/O) disabled.
+ */
+#define VM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6)
+
+/* Reasons for the vm event request */
+/* Default case */
+#define VM_EVENT_REASON_UNKNOWN                 0
+/* Memory access violation */
+#define VM_EVENT_REASON_MEM_ACCESS              1
+/* Memory sharing event */
+#define VM_EVENT_REASON_MEM_SHARING             2
+/* Memory paging event */
+#define VM_EVENT_REASON_MEM_PAGING              3
+/* CR0 was updated */
+#define VM_EVENT_REASON_MOV_TO_CR0              4
+/* CR3 was updated */
+#define VM_EVENT_REASON_MOV_TO_CR3              5
+/* CR4 was updated */
+#define VM_EVENT_REASON_MOV_TO_CR4              6
+/* An MSR was updated. Does NOT honour HVMPME_onchangeonly */
+#define VM_EVENT_REASON_MOV_TO_MSR              9
+/* Debug operation executed (int3) */
+#define VM_EVENT_REASON_SOFTWARE_BREAKPOINT     7
+/* Single-step (MTF) */
+#define VM_EVENT_REASON_SINGLESTEP              8
+
+/* Using a custom struct (not hvm_hw_cpu) so as to not fill
+ * the vm_event ring buffer too quickly. */
+struct vm_event_regs_x86 {
+    uint64_t rax;
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbx;
+    uint64_t rsp;
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+    uint64_t rflags;
+    uint64_t dr7;
+    uint64_t rip;
+    uint64_t cr0;
+    uint64_t cr2;
+    uint64_t cr3;
+    uint64_t cr4;
+    uint64_t sysenter_cs;
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+    uint64_t msr_efer;
+    uint64_t msr_star;
+    uint64_t msr_lstar;
+    uint64_t fs_base;
+    uint64_t gs_base;
+    uint32_t cs_arbytes;
+    uint32_t _pad;
+};
+
+struct vm_event_mem_access_data {
+    uint64_t gfn;
+    uint64_t offset;
+    uint64_t gla; /* if gla_valid */
+    uint8_t access_r;
+    uint8_t access_w;
+    uint8_t access_x;
+    uint8_t gla_valid;
+    uint8_t fault_with_gla;
+    uint8_t fault_in_gpt;
+    uint16_t _pad;
+};
+
+struct vm_event_mov_to_cr_data {
+    uint64_t new_value;
+    uint64_t old_value;
+};
+
+struct vm_event_software_breakpoint_data {
+    uint64_t gfn;
+};
+
+struct vm_event_singlestep_data {
+    uint64_t gfn;
+};
+
+struct vm_event_mov_to_msr_data {
+    uint64_t msr;
+    uint64_t value;
+};
+
+struct vm_event_paging_data {
+    uint64_t gfn;
+    uint32_t p2mt;
+    uint32_t _pad;
+};
+
+struct vm_event_sharing_data {
+    uint64_t gfn;
+    uint32_t p2mt;
+    uint32_t _pad;
+};
+
+typedef struct vm_event_st {
+    uint32_t version; /* VM_EVENT_INTERFACE_VERSION */
+    uint32_t flags;
+    uint32_t vcpu_id;
+    uint32_t reason; /* VM_EVENT_REASON_* */
+
+    union {
+        struct vm_event_paging_data                mem_paging;
+        struct vm_event_sharing_data               mem_sharing;
+        struct vm_event_mem_access_data            mem_access;
+        struct vm_event_mov_to_cr_data             mov_to_cr;
+        struct vm_event_mov_to_msr_data            mov_to_msr;
+        struct vm_event_software_breakpoint_data   software_breakpoint;
+        struct vm_event_singlestep_data            singlestep;
+    } data;
+
+    union {
+        struct vm_event_regs_x86 x86;
+    } regs;
+} vm_event_request_t, vm_event_response_t;
+
+DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t);
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+#endif /* _XEN_PUBLIC_VM_EVENT_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 64a2bd3..681efa9 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -24,6 +24,7 @@
 #include <public/sysctl.h>
 #include <public/vcpu.h>
 #include <public/mem_event.h>
+#include <public/vm_event.h>
 #include <public/event_channel.h>
 
 #ifdef CONFIG_COMPAT
@@ -216,6 +217,8 @@ struct vcpu
 
     /* VCPU paused for mem_event replies. */
     atomic_t         mem_event_pause_count;
+    /* VCPU paused for vm_event replies. */
+    atomic_t         vm_event_pause_count;
     /* VCPU paused by system controller. */
     int              controller_pause_count;
 
@@ -292,6 +295,41 @@ struct mem_event_per_domain
     struct mem_event_domain monitor;
 };
 
+/* VM event */
+struct vm_event_domain
+{
+    /* ring lock */
+    spinlock_t ring_lock;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char target_producers;
+    /* shared ring page */
+    void *ring_page;
+    struct page_info *ring_pg_struct;
+    /* front-end ring */
+    vm_event_front_ring_t front_ring;
+    /* event channel port (vcpu0 only) */
+    int xen_port;
+    /* vm_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
+    /* The last vcpu woken up */
+    unsigned int last_vcpu_wake_up;
+};
+
+struct vm_event_per_domain
+{
+    /* Memory sharing support */
+    struct vm_event_domain share;
+    /* Memory paging support */
+    struct vm_event_domain paging;
+    /* VM event monitor support */
+    struct vm_event_domain monitor;
+};
+
 struct evtchn_port_ops;
 
 /*
@@ -445,6 +483,9 @@ struct domain
     /* Various mem_events */
     struct mem_event_per_domain *mem_event;
 
+    /* Various vm_events */
+    struct vm_event_per_domain *vm_event;
+
     /*
      * Can be specified by the user. If that is not the case, it is
      * computed from the union of all the vcpu cpu-affinity masks.
diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
new file mode 100644
index 0000000..988ea42
--- /dev/null
+++ b/xen/include/xen/vm_event.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ * vm_event.h
+ *
+ * Common interface for memory event support.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __VM_EVENT_H__
+#define __VM_EVENT_H__
+
+#include <xen/sched.h>
+
+#ifdef HAS_MEM_ACCESS
+
+/* Clean up on domain destruction */
+void vm_event_cleanup(struct domain *d);
+
+/* Returns whether a ring has been set up */
+bool_t vm_event_check_ring(struct vm_event_domain *med);
+
+/* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no
+ * available space and the caller is a foreign domain. If the guest itself
+ * is the caller, -EBUSY is avoided by sleeping on a wait queue to ensure
+ * that the ring does not lose future events.
+ *
+ * However, the allow_sleep flag can be set to false in cases in which it is ok
+ * to lose future events, and thus -EBUSY can be returned to guest vcpus
+ * (handle with care!).
+ *
+ * In general, you must follow a claim_slot() call with either put_request() or
+ * cancel_slot(), both of which are guaranteed to
+ * succeed.
+ */
+int __vm_event_claim_slot(struct domain *d, struct vm_event_domain *med,
+                            bool_t allow_sleep);
+static inline int vm_event_claim_slot(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return __vm_event_claim_slot(d, med, 1);
+}
+
+static inline int vm_event_claim_slot_nosleep(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return __vm_event_claim_slot(d, med, 0);
+}
+
+void vm_event_cancel_slot(struct domain *d, struct vm_event_domain *med);
+
+void vm_event_put_request(struct domain *d, struct vm_event_domain *med,
+                            vm_event_request_t *req);
+
+int vm_event_get_response(struct domain *d, struct vm_event_domain *med,
+                           vm_event_response_t *rsp);
+
+int do_vm_event_op(int op, uint32_t domain, void *arg);
+int vm_event_domctl(struct domain *d, xen_domctl_vm_event_op_t *mec,
+                     XEN_GUEST_HANDLE_PARAM(void) u_domctl);
+
+void vm_event_vcpu_pause(struct vcpu *v);
+void vm_event_vcpu_unpause(struct vcpu *v);
+
+#else
+
+static inline void vm_event_cleanup(struct domain *d) {}
+
+static inline bool_t vm_event_check_ring(struct vm_event_domain *med)
+{
+    return 0;
+}
+
+static inline int vm_event_claim_slot(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return -ENOSYS;
+}
+
+static inline int vm_event_claim_slot_nosleep(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return -ENOSYS;
+}
+
+static inline
+void vm_event_cancel_slot(struct domain *d, struct vm_event_domain *med)
+{}
+
+static inline
+void vm_event_put_request(struct domain *d, struct vm_event_domain *med,
+                            vm_event_request_t *req)
+{}
+
+static inline
+int vm_event_get_response(struct domain *d, struct vm_event_domain *med,
+                           vm_event_response_t *rsp)
+{
+    return -ENOSYS;
+}
+
+static inline int do_vm_event_op(int op, uint32_t domain, void *arg)
+{
+    return -ENOSYS;
+}
+
+static inline
+int vm_event_domctl(struct domain *d, xen_domctl_vm_event_op_t *mec,
+                     XEN_GUEST_HANDLE_PARAM(void) u_domctl)
+{
+    return -ENOSYS;
+}
+
+static inline void vm_event_vcpu_pause(struct vcpu *v) {}
+static inline void vm_event_vcpu_unpause(struct vcpu *v) {}
+
+#endif /* HAS_MEM_ACCESS */
+
+#endif /* __VM_EVENT_H__ */
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index f20e89c..d6d403a 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -525,6 +525,18 @@ static XSM_INLINE int xsm_mem_event_op(XSM_DEFAULT_ARG struct domain *d, int op)
     XSM_ASSERT_ACTION(XSM_DM_PRIV);
     return xsm_default_action(action, current->domain, d);
 }
+
+static XSM_INLINE int xsm_vm_event_control(XSM_DEFAULT_ARG struct domain *d, int mode, int op)
+{
+    XSM_ASSERT_ACTION(XSM_PRIV);
+    return xsm_default_action(action, current->domain, d);
+}
+
+static XSM_INLINE int xsm_vm_event_op(XSM_DEFAULT_ARG struct domain *d, int op)
+{
+    XSM_ASSERT_ACTION(XSM_DM_PRIV);
+    return xsm_default_action(action, current->domain, d);
+}
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 4ce089f..581e712 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -144,6 +144,8 @@ struct xsm_operations {
 #ifdef HAS_MEM_ACCESS
     int (*mem_event_control) (struct domain *d, int mode, int op);
     int (*mem_event_op) (struct domain *d, int op);
+    int (*vm_event_control) (struct domain *d, int mode, int op);
+    int (*vm_event_op) (struct domain *d, int op);
 #endif
 
 #ifdef CONFIG_X86
@@ -553,6 +555,16 @@ static inline int xsm_mem_event_op (xsm_default_t def, struct domain *d, int op)
 {
     return xsm_ops->mem_event_op(d, op);
 }
+
+static inline int xsm_vm_event_control (xsm_default_t def, struct domain *d, int mode, int op)
+{
+    return xsm_ops->vm_event_control(d, mode, op);
+}
+
+static inline int xsm_vm_event_op (xsm_default_t def, struct domain *d, int op)
+{
+    return xsm_ops->vm_event_op(d, op);
+}
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 8eb3050..3cf5126 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -121,6 +121,8 @@ void xsm_fixup_ops (struct xsm_operations *ops)
 #ifdef HAS_MEM_ACCESS
     set_to_dummy_if_null(ops, mem_event_control);
     set_to_dummy_if_null(ops, mem_event_op);
+    set_to_dummy_if_null(ops, vm_event_control);
+    set_to_dummy_if_null(ops, vm_event_op);
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index d48463f..05ebf03 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1212,6 +1212,16 @@ static int flask_mem_event_op(struct domain *d, int op)
 {
     return current_has_perm(d, SECCLASS_HVM, HVM__MEM_EVENT);
 }
+
+static int flask_vm_event_control(struct domain *d, int mode, int op)
+{
+    return current_has_perm(d, SECCLASS_HVM, HVM__VM_EVENT);
+}
+
+static int flask_vm_event_op(struct domain *d, int op)
+{
+    return current_has_perm(d, SECCLASS_HVM, HVM__VM_EVENT);
+}
 #endif /* HAS_MEM_ACCESS */
 
 #ifdef CONFIG_X86
@@ -1599,6 +1609,8 @@ static struct xsm_operations flask_ops = {
 #ifdef HAS_MEM_ACCESS
     .mem_event_control = flask_mem_event_control,
     .mem_event_op = flask_mem_event_op,
+    .vm_event_control = flask_vm_event_control,
+    .vm_event_op = flask_vm_event_op,
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 1da9f63..a4241b5 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -250,6 +250,7 @@ class hvm
     hvmctl
 # XEN_DOMCTL_set_access_required
     mem_event
+    vm_event
 # XEN_DOMCTL_mem_sharing_op and XENMEM_sharing_op_{share,add_physmap} with:
 #  source = the domain making the hypercall
 #  target = domain whose memory is being shared
-- 
2.1.4