All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tamas K Lengyel <tamas.lengyel@zentific.com>
To: xen-devel@lists.xen.org
Cc: kevin.tian@intel.com, wei.liu2@citrix.com,
	ian.campbell@citrix.com, steve@zentific.com,
	stefano.stabellini@eu.citrix.com, jun.nakajima@intel.com,
	tim@xen.org, ian.jackson@eu.citrix.com, eddie.dong@intel.com,
	andres@lagarcavilla.org, jbeulich@suse.com,
	Tamas K Lengyel <tamas.lengyel@zentific.com>,
	rshriram@cs.ubc.ca, keir@xen.org, dgdegra@tycho.nsa.gov,
	yanghy@cn.fujitsu.com, rcojocaru@bitdefender.com
Subject: [RFC PATCH V3 05/12] xen: Introduce vm_event
Date: Thu, 29 Jan 2015 22:46:31 +0100	[thread overview]
Message-ID: <1422567998-29995-6-git-send-email-tamas.lengyel@zentific.com> (raw)
In-Reply-To: <1422567998-29995-1-git-send-email-tamas.lengyel@zentific.com>

To make it easier to review the renaming process of mem_event -> vm_event,
the process is broken into three pieces, of which this patch is the first.
In this patch the vm_event subsystem is introduced and hooked into the build
process, but it is not yet used anywhere.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@zentific.com>
---
 MAINTAINERS                         |   1 +
 docs/misc/xsm-flask.txt             |   1 +
 tools/libxc/Makefile                |   1 +
 tools/libxc/xc_private.h            |  12 +
 tools/libxc/xc_vm_event.c           | 162 ++++++++
 xen/common/Makefile                 |   1 +
 xen/common/vm_event.c               | 739 ++++++++++++++++++++++++++++++++++++
 xen/include/public/domctl.h         |  79 ++++
 xen/include/public/vm_event.h       | 193 ++++++++++
 xen/include/xen/sched.h             |  41 ++
 xen/include/xen/vm_event.h          | 143 +++++++
 xen/include/xsm/dummy.h             |  12 +
 xen/include/xsm/xsm.h               |  12 +
 xen/xsm/dummy.c                     |   2 +
 xen/xsm/flask/hooks.c               |  12 +
 xen/xsm/flask/policy/access_vectors |   1 +
 16 files changed, 1412 insertions(+)
 create mode 100644 tools/libxc/xc_vm_event.c
 create mode 100644 xen/common/vm_event.c
 create mode 100644 xen/include/public/vm_event.h
 create mode 100644 xen/include/xen/vm_event.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3bbac9e..cff3e5f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -364,6 +364,7 @@ F:	tools/memshr
 MEMORY EVENT AND ACCESS
 M:	Tim Deegan <tim@xen.org>
 S:	Supported
+F:	xen/common/vm_event.c
 F:	xen/common/mem_event.c
 F:	xen/common/mem_access.c
 
diff --git a/docs/misc/xsm-flask.txt b/docs/misc/xsm-flask.txt
index 9559028..9eead61 100644
--- a/docs/misc/xsm-flask.txt
+++ b/docs/misc/xsm-flask.txt
@@ -87,6 +87,7 @@ __HYPERVISOR_domctl (xen/include/public/domctl.h)
  * XEN_DOMCTL_set_machine_address_size
  * XEN_DOMCTL_debug_op
  * XEN_DOMCTL_gethvmcontext_partial
+ * XEN_DOMCTL_vm_event_op
  * XEN_DOMCTL_mem_event_op
  * XEN_DOMCTL_mem_sharing_op
  * XEN_DOMCTL_setvcpuextstate
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index bd2ca6c..b97e535 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -26,6 +26,7 @@ CTRL_SRCS-y       += xc_pm.c
 CTRL_SRCS-y       += xc_cpu_hotplug.c
 CTRL_SRCS-y       += xc_resume.c
 CTRL_SRCS-y       += xc_tmem.c
+CTRL_SRCS-y       += xc_vm_event.c
 CTRL_SRCS-y       += xc_mem_event.c
 CTRL_SRCS-y       += xc_mem_paging.c
 CTRL_SRCS-y       += xc_mem_access.c
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index f1f601c..58db86d 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -432,4 +432,16 @@ int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
 void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
                           uint32_t *port, int enable_introspection);
 
+/**
+ * vm_event operations. Internal use only.
+ */
+int xc_vm_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
+                        unsigned int mode, uint32_t *port);
+/*
+ * Enables vm_event and returns the mapped ring page indicated by param.
+ * param can be HVM_PARAM_PAGING/ACCESS/SHARING_RING_PFN
+ */
+void *xc_vm_event_enable(xc_interface *xch, domid_t domain_id, int param,
+                         uint32_t *port, int enable_introspection);
+
 #endif /* __XC_PRIVATE_H__ */
diff --git a/tools/libxc/xc_vm_event.c b/tools/libxc/xc_vm_event.c
new file mode 100644
index 0000000..dda766e
--- /dev/null
+++ b/tools/libxc/xc_vm_event.c
@@ -0,0 +1,162 @@
+/******************************************************************************
+ *
+ * xc_vm_event.c
+ *
+ * Interface to low-level VM event functionality.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xc_private.h"
+
+int xc_vm_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
+                         unsigned int mode, uint32_t *port)
+{
+    DECLARE_DOMCTL;
+    int rc;
+
+    domctl.cmd = XEN_DOMCTL_mem_event_op;
+    domctl.domain = domain_id;
+    domctl.u.vm_event_op.op = op;
+    domctl.u.vm_event_op.mode = mode;
+
+    rc = do_domctl(xch, &domctl);
+    if ( !rc && port )
+        *port = domctl.u.vm_event_op.port;
+    return rc;
+}
+
+void *xc_vm_event_enable(xc_interface *xch, domid_t domain_id, int param,
+                          uint32_t *port, int enable_introspection)
+{
+    void *ring_page = NULL;
+    uint64_t pfn;
+    xen_pfn_t ring_pfn, mmap_pfn;
+    unsigned int op, mode;
+    int rc1, rc2, saved_errno;
+
+    if ( !port )
+    {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    /* Pause the domain for ring page setup */
+    rc1 = xc_domain_pause(xch, domain_id);
+    if ( rc1 != 0 )
+    {
+        PERROR("Unable to pause domain\n");
+        return NULL;
+    }
+
+    /* Get the pfn of the ring page */
+    rc1 = xc_hvm_param_get(xch, domain_id, param, &pfn);
+    if ( rc1 != 0 )
+    {
+        PERROR("Failed to get pfn of ring page\n");
+        goto out;
+    }
+
+    ring_pfn = pfn;
+    mmap_pfn = pfn;
+    ring_page = xc_map_foreign_batch(xch, domain_id, PROT_READ | PROT_WRITE,
+                                     &mmap_pfn, 1);
+    if ( mmap_pfn & XEN_DOMCTL_PFINFO_XTAB )
+    {
+        /* Map failed, populate ring page */
+        rc1 = xc_domain_populate_physmap_exact(xch, domain_id, 1, 0, 0,
+                                              &ring_pfn);
+        if ( rc1 != 0 )
+        {
+            PERROR("Failed to populate ring pfn\n");
+            goto out;
+        }
+
+        mmap_pfn = ring_pfn;
+        ring_page = xc_map_foreign_batch(xch, domain_id, PROT_READ | PROT_WRITE,
+                                         &mmap_pfn, 1);
+        if ( mmap_pfn & XEN_DOMCTL_PFINFO_XTAB )
+        {
+            PERROR("Could not map the ring page\n");
+            goto out;
+        }
+    }
+
+    switch ( param )
+    {
+    case HVM_PARAM_PAGING_RING_PFN:
+        op = XEN_DOMCTL_VM_EVENT_OP_PAGING_ENABLE;
+        mode = XEN_DOMCTL_VM_EVENT_OP_PAGING;
+        break;
+
+    case HVM_PARAM_MONITOR_RING_PFN:
+        if ( enable_introspection )
+            op = XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION;
+        else
+            op = XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE;
+        mode = XEN_DOMCTL_VM_EVENT_OP_MONITOR;
+        break;
+
+    case HVM_PARAM_SHARING_RING_PFN:
+        op = XEN_DOMCTL_VM_EVENT_OP_SHARING_ENABLE;
+        mode = XEN_DOMCTL_VM_EVENT_OP_SHARING;
+        break;
+
+    /*
+     * This is for the outside chance that the HVM_PARAM is valid but is invalid
+     * as far as vm_event goes.
+     */
+    default:
+        errno = EINVAL;
+        rc1 = -1;
+        goto out;
+    }
+
+    rc1 = xc_vm_event_control(xch, domain_id, op, mode, port);
+    if ( rc1 != 0 )
+    {
+        PERROR("Failed to enable vm_event\n");
+        goto out;
+    }
+
+    /* Remove the ring_pfn from the guest's physmap */
+    rc1 = xc_domain_decrease_reservation_exact(xch, domain_id, 1, 0, &ring_pfn);
+    if ( rc1 != 0 )
+        PERROR("Failed to remove ring page from guest physmap");
+
+ out:
+    saved_errno = errno;
+
+    rc2 = xc_domain_unpause(xch, domain_id);
+    if ( rc1 != 0 || rc2 != 0 )
+    {
+        if ( rc2 != 0 )
+        {
+            if ( rc1 == 0 )
+                saved_errno = errno;
+            PERROR("Unable to unpause domain");
+        }
+
+        if ( ring_page )
+            munmap(ring_page, XC_PAGE_SIZE);
+        ring_page = NULL;
+
+        errno = saved_errno;
+    }
+
+    return ring_page;
+}
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 1956091..0db6967 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -55,6 +55,7 @@ obj-y += lzo.o
 obj-$(HAS_PDX) += pdx.o
 obj-$(HAS_MEM_ACCESS) += mem_access.o
 obj-$(HAS_MEM_ACCESS) += mem_event.o
+obj-$(HAS_MEM_ACCESS) += vm_event.o
 
 obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo unlz4 earlycpio,$(n).init.o)
 
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
new file mode 100644
index 0000000..0db899e
--- /dev/null
+++ b/xen/common/vm_event.c
@@ -0,0 +1,739 @@
+/******************************************************************************
+ * vm_event.c
+ *
+ * VM event support.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/wait.h>
+#include <xen/vm_event.h>
+#include <xen/mem_access.h>
+#include <asm/p2m.h>
+
+#ifdef HAS_MEM_PAGING
+#include <asm/mem_paging.h>
+#endif
+
+#ifdef HAS_MEM_SHARING
+#include <asm/mem_sharing.h>
+#endif
+
+#include <xsm/xsm.h>
+
+/* for public/io/ring.h macros */
+#define xen_mb()   mb()
+#define xen_rmb()  rmb()
+#define xen_wmb()  wmb()
+
+#define vm_event_ring_lock_init(_ved)  spin_lock_init(&(_ved)->ring_lock)
+#define vm_event_ring_lock(_ved)       spin_lock(&(_ved)->ring_lock)
+#define vm_event_ring_unlock(_ved)     spin_unlock(&(_ved)->ring_lock)
+
+static int vm_event_enable(
+    struct domain *d,
+    xen_domctl_vm_event_op_t *vec,
+    struct vm_event_domain *ved,
+    int pause_flag,
+    int param,
+    xen_event_channel_notification_t notification_fn)
+{
+    int rc;
+    unsigned long ring_gfn = d->arch.hvm_domain.params[param];
+
+    /* Only one helper at a time. If the helper crashed,
+     * the ring is in an undefined state and so is the guest.
+     */
+    if ( ved->ring_page )
+        return -EBUSY;
+
+    /* The parameter defaults to zero, and it should be
+     * set to something */
+    if ( ring_gfn == 0 )
+        return -ENOSYS;
+
+    vm_event_ring_lock_init(ved);
+    vm_event_ring_lock(ved);
+
+    rc = prepare_ring_for_helper(d, ring_gfn, &ved->ring_pg_struct,
+                                    &ved->ring_page);
+    if ( rc < 0 )
+        goto err;
+
+    /* Set the number of currently blocked vCPUs to 0. */
+    ved->blocked = 0;
+
+    /* Allocate event channel */
+    rc = alloc_unbound_xen_event_channel(d->vcpu[0],
+                                         current->domain->domain_id,
+                                         notification_fn);
+    if ( rc < 0 )
+        goto err;
+
+    ved->xen_port = vec->port = rc;
+
+    /* Prepare ring buffer */
+    FRONT_RING_INIT(&ved->front_ring,
+                    (vm_event_sring_t *)ved->ring_page,
+                    PAGE_SIZE);
+
+    /* Save the pause flag for this particular ring. */
+    ved->pause_flag = pause_flag;
+
+    /* Initialize the last-chance wait queue. */
+    init_waitqueue_head(&ved->wq);
+
+    vm_event_ring_unlock(ved);
+    return 0;
+
+ err:
+    destroy_ring_for_helper(&ved->ring_page,
+                            ved->ring_pg_struct);
+    vm_event_ring_unlock(ved);
+
+    return rc;
+}
+
+static unsigned int vm_event_ring_available(struct vm_event_domain *ved)
+{
+    int avail_req = RING_FREE_REQUESTS(&ved->front_ring);
+    avail_req -= ved->target_producers;
+    avail_req -= ved->foreign_producers;
+
+    BUG_ON(avail_req < 0);
+
+    return avail_req;
+}
+
+/*
+ * vm_event_wake_blocked() will wakeup vcpus waiting for room in the
+ * ring. These vCPUs were paused on their way out after placing an event,
+ * but need to be resumed where the ring is capable of processing at least
+ * one event from them.
+ */
+static void vm_event_wake_blocked(struct domain *d, struct vm_event_domain *ved)
+{
+    struct vcpu *v;
+    int online = d->max_vcpus;
+    unsigned int avail_req = vm_event_ring_available(ved);
+
+    if ( avail_req == 0 || ved->blocked == 0 )
+        return;
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit).
+     * See comment below in vm_event_put_request().
+     */
+    for_each_vcpu ( d, v )
+        if ( test_bit(ved->pause_flag, &v->pause_flags) )
+            online--;
+
+    ASSERT(online == (d->max_vcpus - ved->blocked));
+
+    /* We remember which vcpu last woke up to avoid scanning always linearly
+     * from zero and starving higher-numbered vcpus under high load */
+    if ( d->vcpu )
+    {
+        int i, j, k;
+
+        for (i = ved->last_vcpu_wake_up + 1, j = 0; j < d->max_vcpus; i++, j++)
+        {
+            k = i % d->max_vcpus;
+            v = d->vcpu[k];
+            if ( !v )
+                continue;
+
+            if ( !(ved->blocked) || online >= avail_req )
+               break;
+
+            if ( test_and_clear_bit(ved->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                online++;
+                ved->blocked--;
+                ved->last_vcpu_wake_up = k;
+            }
+        }
+    }
+}
+
+/*
+ * In the event that a vCPU attempted to place an event in the ring and
+ * was unable to do so, it is queued on a wait queue.  These are woken as
+ * needed, and take precedence over the blocked vCPUs.
+ */
+static void vm_event_wake_queued(struct domain *d, struct vm_event_domain *ved)
+{
+    unsigned int avail_req = vm_event_ring_available(ved);
+
+    if ( avail_req > 0 )
+        wake_up_nr(&ved->wq, avail_req);
+}
+
+/*
+ * vm_event_wake() will wakeup all vcpus waiting for the ring to
+ * become available.  If we have queued vCPUs, they get top priority. We
+ * are guaranteed that they will go through code paths that will eventually
+ * call vm_event_wake() again, ensuring that any blocked vCPUs will get
+ * unpaused once all the queued vCPUs have made it through.
+ */
+void vm_event_wake(struct domain *d, struct vm_event_domain *ved)
+{
+    if (!list_empty(&ved->wq.list))
+        vm_event_wake_queued(d, ved);
+    else
+        vm_event_wake_blocked(d, ved);
+}
+
+static int vm_event_disable(struct domain *d, struct vm_event_domain *ved)
+{
+    if ( ved->ring_page )
+    {
+        struct vcpu *v;
+
+        vm_event_ring_lock(ved);
+
+        if ( !list_empty(&ved->wq.list) )
+        {
+            vm_event_ring_unlock(ved);
+            return -EBUSY;
+        }
+
+        /* Free domU's event channel and leave the other one unbound */
+        free_xen_event_channel(d->vcpu[0], ved->xen_port);
+
+        /* Unblock all vCPUs */
+        for_each_vcpu ( d, v )
+        {
+            if ( test_and_clear_bit(ved->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                ved->blocked--;
+            }
+        }
+
+        destroy_ring_for_helper(&ved->ring_page,
+                                ved->ring_pg_struct);
+        vm_event_ring_unlock(ved);
+    }
+
+    return 0;
+}
+
+static inline void vm_event_release_slot(struct domain *d,
+                                          struct vm_event_domain *ved)
+{
+    /* Update the accounting */
+    if ( current->domain == d )
+        ved->target_producers--;
+    else
+        ved->foreign_producers--;
+
+    /* Kick any waiters */
+    vm_event_wake(d, ved);
+}
+
+/*
+ * vm_event_mark_and_pause() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in vm_event_wake_waiters().
+ */
+void vm_event_mark_and_pause(struct vcpu *v, struct vm_event_domain *ved)
+{
+    if ( !test_and_set_bit(ved->pause_flag, &v->pause_flags) )
+    {
+        vcpu_pause_nosync(v);
+        ved->blocked++;
+    }
+}
+
+/*
+ * This must be preceded by a call to claim_slot(), and is guaranteed to
+ * succeed.  As a side-effect however, the vCPU may be paused if the ring is
+ * overly full and its continued execution would cause stalling and excessive
+ * waiting.  The vCPU will be automatically unpaused when the ring clears.
+ */
+void vm_event_put_request(struct domain *d,
+                           struct vm_event_domain *ved,
+                           vm_event_request_t *req)
+{
+    vm_event_front_ring_t *front_ring;
+    int free_req;
+    unsigned int avail_req;
+    RING_IDX req_prod;
+
+    if ( current->domain != d )
+    {
+        req->flags |= VM_EVENT_FLAG_FOREIGN;
+#ifndef NDEBUG
+        if ( !(req->flags & VM_EVENT_FLAG_VCPU_PAUSED) )
+            gdprintk(XENLOG_G_WARNING, "d%dv%d was not paused.\n",
+                     d->domain_id, req->vcpu_id);
+#endif
+    }
+
+    vm_event_ring_lock(ved);
+
+    /* Due to the reservations, this step must succeed. */
+    front_ring = &ved->front_ring;
+    free_req = RING_FREE_REQUESTS(front_ring);
+    ASSERT(free_req > 0);
+
+    /* Copy request */
+    req_prod = front_ring->req_prod_pvt;
+    memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
+    req_prod++;
+
+    /* Update ring */
+    front_ring->req_prod_pvt = req_prod;
+    RING_PUSH_REQUESTS(front_ring);
+
+    /* We've actually *used* our reservation, so release the slot. */
+    vm_event_release_slot(d, ved);
+
+    /* Give this vCPU a black eye if necessary, on the way out.
+     * See the comments above wake_blocked() for more information
+     * on how this vechanism works to avoid waiting. */
+    avail_req = vm_event_ring_available(ved);
+    if( current->domain == d && avail_req < d->max_vcpus )
+        vm_event_mark_and_pause(current, ved);
+
+    vm_event_ring_unlock(ved);
+
+    notify_via_xen_event_channel(d, ved->xen_port);
+}
+
+int vm_event_get_response(struct domain *d, struct vm_event_domain *ved, vm_event_response_t *rsp)
+{
+    vm_event_front_ring_t *front_ring;
+    RING_IDX rsp_cons;
+
+    vm_event_ring_lock(ved);
+
+    front_ring = &ved->front_ring;
+    rsp_cons = front_ring->rsp_cons;
+
+    if ( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) )
+    {
+        vm_event_ring_unlock(ved);
+        return 0;
+    }
+
+    /* Copy response */
+    memcpy(rsp, RING_GET_RESPONSE(front_ring, rsp_cons), sizeof(*rsp));
+    rsp_cons++;
+
+    /* Update ring */
+    front_ring->rsp_cons = rsp_cons;
+    front_ring->sring->rsp_event = rsp_cons + 1;
+
+    /* Kick any waiters -- since we've just consumed an event,
+     * there may be additional space available in the ring. */
+    vm_event_wake(d, ved);
+
+    vm_event_ring_unlock(ved);
+
+    return 1;
+}
+
+void vm_event_cancel_slot(struct domain *d, struct vm_event_domain *ved)
+{
+    vm_event_ring_lock(ved);
+    vm_event_release_slot(d, ved);
+    vm_event_ring_unlock(ved);
+}
+
+static int vm_event_grab_slot(struct vm_event_domain *ved, int foreign)
+{
+    unsigned int avail_req;
+
+    if ( !ved->ring_page )
+        return -ENOSYS;
+
+    vm_event_ring_lock(ved);
+
+    avail_req = vm_event_ring_available(ved);
+    if ( avail_req == 0 )
+    {
+        vm_event_ring_unlock(ved);
+        return -EBUSY;
+    }
+
+    if ( !foreign )
+        ved->target_producers++;
+    else
+        ved->foreign_producers++;
+
+    vm_event_ring_unlock(ved);
+
+    return 0;
+}
+
+/* Simple try_grab wrapper for use in the wait_event() macro. */
+static int vm_event_wait_try_grab(struct vm_event_domain *ved, int *rc)
+{
+    *rc = vm_event_grab_slot(ved, 0);
+    return *rc;
+}
+
+/* Call vm_event_grab_slot() until the ring doesn't exist, or is available. */
+static int vm_event_wait_slot(struct vm_event_domain *ved)
+{
+    int rc = -EBUSY;
+    wait_event(ved->wq, vm_event_wait_try_grab(ved, &rc) != -EBUSY);
+    return rc;
+}
+
+bool_t vm_event_check_ring(struct vm_event_domain *ved)
+{
+    return (ved->ring_page != NULL);
+}
+
+/*
+ * Determines whether or not the current vCPU belongs to the target domain,
+ * and calls the appropriate wait function.  If it is a guest vCPU, then we
+ * use vm_event_wait_slot() to reserve a slot.  As long as there is a ring,
+ * this function will always return 0 for a guest.  For a non-guest, we check
+ * for space and return -EBUSY if the ring is not available.
+ *
+ * Return codes: -ENOSYS: the ring is not yet configured
+ *               -EBUSY: the ring is busy
+ *               0: a spot has been reserved
+ *
+ */
+int __vm_event_claim_slot(struct domain *d, struct vm_event_domain *ved,
+                            bool_t allow_sleep)
+{
+    if ( (current->domain == d) && allow_sleep )
+        return vm_event_wait_slot(ved);
+    else
+        return vm_event_grab_slot(ved, (current->domain != d));
+}
+
+#ifdef HAS_MEM_PAGING
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_paging_notification(struct vcpu *v, unsigned int port)
+{
+    if ( likely(v->domain->vm_event->paging.ring_page != NULL) )
+        p2m_mem_paging_resume(v->domain);
+}
+#endif
+
+#ifdef HAS_MEM_ACCESS
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_access_notification(struct vcpu *v, unsigned int port)
+{
+    if ( likely(v->domain->vm_event->monitor.ring_page != NULL) )
+        mem_access_resume(v->domain);
+}
+#endif
+
+#ifdef HAS_MEM_SHARING
+/* Registered with Xen-bound event channel for incoming notifications. */
+static void mem_sharing_notification(struct vcpu *v, unsigned int port)
+{
+    if ( likely(v->domain->vm_event->share.ring_page != NULL) )
+        mem_sharing_sharing_resume(v->domain);
+}
+#endif
+
+int do_vm_event_op(int op, uint32_t domain, void *arg)
+{
+    int ret;
+    struct domain *d;
+
+    ret = rcu_lock_live_remote_domain_by_id(domain, &d);
+    if ( ret )
+        return ret;
+
+    ret = xsm_vm_event_op(XSM_DM_PRIV, d, op);
+    if ( ret )
+        goto out;
+
+    switch (op)
+    {
+#ifdef HAS_MEM_PAGING
+        case XENMEM_paging_op:
+            ret = mem_paging_memop(d, arg);
+            break;
+#endif
+#ifdef HAS_MEM_SHARING
+        case XENMEM_sharing_op:
+            ret = mem_sharing_memop(d, arg);
+            break;
+#endif
+        default:
+            ret = -ENOSYS;
+    }
+
+ out:
+    rcu_unlock_domain(d);
+    return ret;
+}
+
+/* Clean up on domain destruction */
+void vm_event_cleanup(struct domain *d)
+{
+#ifdef HAS_MEM_PAGING
+    if ( d->vm_event->paging.ring_page ) {
+        /* Destroying the wait queue head means waking up all
+         * queued vcpus. This will drain the list, allowing
+         * the disable routine to complete. It will also drop
+         * all domain refs the wait-queued vcpus are holding.
+         * Finally, because this code path involves previously
+         * pausing the domain (domain_kill), unpausing the
+         * vcpus causes no harm. */
+        destroy_waitqueue_head(&d->vm_event->paging.wq);
+        (void)vm_event_disable(d, &d->vm_event->paging);
+    }
+#endif
+#ifdef HAS_MEM_ACCESS
+    if ( d->vm_event->monitor.ring_page ) {
+        destroy_waitqueue_head(&d->vm_event->monitor.wq);
+        (void)vm_event_disable(d, &d->vm_event->monitor);
+    }
+#endif
+#ifdef HAS_MEM_SHARING
+    if ( d->vm_event->share.ring_page ) {
+        destroy_waitqueue_head(&d->vm_event->share.wq);
+        (void)vm_event_disable(d, &d->vm_event->share);
+    }
+#endif
+}
+
+int vm_event_domctl(struct domain *d, xen_domctl_vm_event_op_t *vec,
+                     XEN_GUEST_HANDLE_PARAM(void) u_domctl)
+{
+    int rc;
+
+    rc = xsm_vm_event_control(XSM_PRIV, d, vec->mode, vec->op);
+    if ( rc )
+        return rc;
+
+    if ( unlikely(d == current->domain) )
+    {
+        gdprintk(XENLOG_INFO, "Tried to do a memory event op on itself.\n");
+        return -EINVAL;
+    }
+
+    if ( unlikely(d->is_dying) )
+    {
+        gdprintk(XENLOG_INFO, "Ignoring memory event op on dying domain %u\n",
+                 d->domain_id);
+        return 0;
+    }
+
+    if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
+    {
+        gdprintk(XENLOG_INFO,
+                 "Memory event op on a domain (%u) with no vcpus\n",
+                 d->domain_id);
+        return -EINVAL;
+    }
+
+    rc = -ENOSYS;
+
+    switch ( vec->mode )
+    {
+#ifdef HAS_MEM_PAGING
+    case XEN_DOMCTL_VM_EVENT_OP_PAGING:
+    {
+        struct vm_event_domain *ved = &d->vm_event->paging;
+        rc = -EINVAL;
+
+        switch( vec->op )
+        {
+        case XEN_DOMCTL_VM_EVENT_OP_PAGING_ENABLE:
+        {
+            struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+            rc = -EOPNOTSUPP;
+            /* pvh fixme: p2m_is_foreign types need addressing */
+            if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) )
+                break;
+
+            rc = -ENODEV;
+            /* Only HAP is supported */
+            if ( !hap_enabled(d) )
+                break;
+
+            /* No paging if iommu is used */
+            rc = -EMLINK;
+            if ( unlikely(need_iommu(d)) )
+                break;
+
+            rc = -EXDEV;
+            /* Disallow paging in a PoD guest */
+            if ( p2m->pod.entry_count )
+                break;
+
+            rc = vm_event_enable(d, vec, ved, _VPF_mem_paging,
+                                    HVM_PARAM_PAGING_RING_PFN,
+                                    mem_paging_notification);
+        }
+        break;
+
+        case XEN_DOMCTL_VM_EVENT_OP_PAGING_DISABLE:
+        {
+            if ( ved->ring_page )
+                rc = vm_event_disable(d, ved);
+        }
+        break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+    }
+    break;
+#endif
+
+#ifdef HAS_MEM_ACCESS
+    case XEN_DOMCTL_VM_EVENT_OP_MONITOR:
+    {
+        struct vm_event_domain *ved = &d->vm_event->monitor;
+        rc = -EINVAL;
+
+        switch( vec->op )
+        {
+        case XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE:
+        case XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION:
+        {
+            rc = vm_event_enable(d, vec, ved, _VPF_mem_access,
+                                    HVM_PARAM_MONITOR_RING_PFN,
+                                    mem_access_notification);
+
+            if ( vec->op == XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION
+                 && !rc )
+                p2m_setup_introspection(d);
+
+        }
+        break;
+
+        case XEN_DOMCTL_VM_EVENT_OP_MONITOR_DISABLE:
+        {
+            if ( ved->ring_page )
+            {
+                rc = vm_event_disable(d, ved);
+                d->arch.hvm_domain.introspection_enabled = 0;
+            }
+        }
+        break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+    }
+    break;
+#endif
+
+#ifdef HAS_MEM_SHARING
+    case XEN_DOMCTL_VM_EVENT_OP_SHARING:
+    {
+        struct vm_event_domain *ved = &d->vm_event->share;
+        rc = -EINVAL;
+
+        switch( vec->op )
+        {
+        case XEN_DOMCTL_VM_EVENT_OP_SHARING_ENABLE:
+        {
+            rc = -EOPNOTSUPP;
+            /* pvh fixme: p2m_is_foreign types need addressing */
+            if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) )
+                break;
+
+            rc = -ENODEV;
+            /* Only HAP is supported */
+            if ( !hap_enabled(d) )
+                break;
+
+            rc = vm_event_enable(d, vec, ved, _VPF_mem_sharing,
+                                    HVM_PARAM_SHARING_RING_PFN,
+                                    mem_sharing_notification);
+        }
+        break;
+
+        case XEN_DOMCTL_VM_EVENT_OP_SHARING_DISABLE:
+        {
+            if ( ved->ring_page )
+                rc = vm_event_disable(d, ved);
+        }
+        break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+    }
+    break;
+#endif
+
+    default:
+        rc = -ENOSYS;
+    }
+
+    return rc;
+}
+
+void vm_event_vcpu_pause(struct vcpu *v)
+{
+    ASSERT(v == current);
+
+    atomic_inc(&v->vm_event_pause_count);
+    vcpu_pause_nosync(v);
+}
+
+void vm_event_vcpu_unpause(struct vcpu *v)
+{
+    int old, new, prev = v->vm_event_pause_count.counter;
+
+    /* All unpause requests as a result of toolstack responses.  Prevent
+     * underflow of the vcpu pause count. */
+    do
+    {
+        old = prev;
+        new = old - 1;
+
+        if ( new < 0 )
+        {
+            printk(XENLOG_G_WARNING
+                   "%pv vm_event: Too many unpause attempts\n", v);
+            return;
+        }
+
+        prev = cmpxchg(&v->vm_event_pause_count.counter, old, new);
+    } while ( prev != old );
+
+    vcpu_unpause(v);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 596a624..a7d3e94 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -835,6 +835,84 @@ typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t);
 
 /*
+ * VM event operations
+ */
+
+/* XEN_DOMCTL_vm_event_op */
+
+/*
+ * Domain memory paging
+ * Page memory in and out.
+ * Domctl interface to set up and tear down the
+ * pager<->hypervisor interface. Use XENMEM_paging_op*
+ * to perform per-page operations.
+ *
+ * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several
+ * non-standard error codes to indicate why paging could not be enabled:
+ * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
+ * EMLINK - guest has iommu passthrough enabled
+ * EXDEV  - guest has PoD enabled
+ * EBUSY  - guest has or had paging enabled, ring buffer still active
+ */
+#define XEN_DOMCTL_VM_EVENT_OP_PAGING            1
+
+#define XEN_DOMCTL_VM_EVENT_OP_PAGING_ENABLE     0
+#define XEN_DOMCTL_VM_EVENT_OP_PAGING_DISABLE    1
+
+/*
+ * Monitor permissions.
+ *
+ * As with paging, use the domctl for teardown/setup of the
+ * helper<->hypervisor interface.
+ *
+ * There are HVM hypercalls to set the per-page access permissions of every
+ * page in a domain.  When one of these permissions--independent, read,
+ * write, and execute--is violated, the VCPU is paused and a memory event
+ * is sent with what happened.  (See public/vm_event.h) .
+ *
+ * The memory event handler can then resume the VCPU and redo the access
+ * with a XENMEM_access_op_resume hypercall.
+ *
+ * The XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE domctl returns several
+ * non-standard error codes to indicate why access could not be enabled:
+ * EBUSY  - guest has or had access enabled, ring buffer still active
+ */
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR                        2
+
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE                 0
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR_DISABLE                1
+#define XEN_DOMCTL_VM_EVENT_OP_MONITOR_ENABLE_INTROSPECTION   2
+
+/*
+ * Sharing ENOMEM helper.
+ *
+ * As with paging, use the domctl for teardown/setup of the
+ * helper<->hypervisor interface.
+ *
+ * If setup, this ring is used to communicate failed allocations
+ * in the unshare path. XENMEM_sharing_op_resume is used to wake up
+ * vcpus that could not unshare.
+ *
+ * Note that shring can be turned on (as per the domctl below)
+ * *without* this ring being setup.
+ */
+#define XEN_DOMCTL_VM_EVENT_OP_SHARING           3
+
+#define XEN_DOMCTL_VM_EVENT_OP_SHARING_ENABLE    0
+#define XEN_DOMCTL_VM_EVENT_OP_SHARING_DISABLE   1
+
+/* Use for teardown/setup of helper<->hypervisor interface for paging,
+ * access and sharing.*/
+struct xen_domctl_vm_event_op {
+    uint32_t       op;           /* XEN_DOMCTL_VM_EVENT_OP_*_* */
+    uint32_t       mode;         /* XEN_DOMCTL_VM_EVENT_OP_* */
+
+    uint32_t port;              /* OUT: event channel for ring */
+};
+typedef struct xen_domctl_vm_event_op xen_domctl_vm_event_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vm_event_op_t);
+
+/*
  * Memory sharing operations
  */
 /* XEN_DOMCTL_mem_sharing_op.
@@ -1123,6 +1201,7 @@ struct xen_domctl {
         struct xen_domctl_set_target        set_target;
         struct xen_domctl_subscribe         subscribe;
         struct xen_domctl_debug_op          debug_op;
+        struct xen_domctl_vm_event_op       vm_event_op;
         struct xen_domctl_mem_event_op      mem_event_op;
         struct xen_domctl_mem_sharing_op    mem_sharing_op;
 #if defined(__i386__) || defined(__x86_64__)
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
new file mode 100644
index 0000000..8fba3d1b
--- /dev/null
+++ b/xen/include/public/vm_event.h
@@ -0,0 +1,193 @@
+/******************************************************************************
+ * vm_event.h
+ *
+ * VM event common structures.
+ *
+ * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _XEN_PUBLIC_VM_EVENT_H
+#define _XEN_PUBLIC_VM_EVENT_H
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#include "xen.h"
+#include "io/ring.h"
+
+#define VM_EVENT_INTERFACE_VERSION 0x00000001
+
+/* Memory event flags */
+#define VM_EVENT_FLAG_VCPU_PAUSED     (1 << 0)
+#define VM_EVENT_FLAG_DROP_PAGE       (1 << 1)
+#define VM_EVENT_FLAG_EVICT_FAIL      (1 << 2)
+#define VM_EVENT_FLAG_FOREIGN         (1 << 3)
+#define VM_EVENT_FLAG_DUMMY           (1 << 4)
+/*
+ * Emulate the fault-causing instruction (if set in the event response flags).
+ * This will allow the guest to continue execution without lifting the page
+ * access restrictions.
+ */
+#define VM_EVENT_FLAG_EMULATE         (1 << 5)
+/*
+ * Same as VM_EVENT_FLAG_EMULATE, but with write operations or operations
+ * potentially having side effects (like memory mapped or port I/O) disabled.
+ */
+#define VM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6)
+
+/* Reasons for the vm event request */
+/* Default case */
+#define VM_EVENT_REASON_UNKNOWN                 0
+/* Memory access violation */
+#define VM_EVENT_REASON_MEM_ACCESS              1
+/* Memory sharing event */
+#define VM_EVENT_REASON_MEM_SHARING             2
+/* Memory paging event */
+#define VM_EVENT_REASON_MEM_PAGING              3
+/* CR0 was updated */
+#define VM_EVENT_REASON_MOV_TO_CR0              4
+/* CR3 was updated */
+#define VM_EVENT_REASON_MOV_TO_CR3              5
+/* CR4 was updated */
+#define VM_EVENT_REASON_MOV_TO_CR4              6
+/* An MSR was updated. Does NOT honour HVMPME_onchangeonly */
+#define VM_EVENT_REASON_MOV_TO_MSR              9
+/* Debug operation executed (int3) */
+#define VM_EVENT_REASON_SOFTWARE_BREAKPOINT     7
+/* Single-step (MTF) */
+#define VM_EVENT_REASON_SINGLESTEP              8
+
+/* Using a custom struct (not hvm_hw_cpu) so as to not fill
+ * the vm_event ring buffer too quickly. */
+struct vm_event_regs_x86 {
+    uint64_t rax;
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbx;
+    uint64_t rsp;
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+    uint64_t rflags;
+    uint64_t dr7;
+    uint64_t rip;
+    uint64_t cr0;
+    uint64_t cr2;
+    uint64_t cr3;
+    uint64_t cr4;
+    uint64_t sysenter_cs;
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+    uint64_t msr_efer;
+    uint64_t msr_star;
+    uint64_t msr_lstar;
+    uint64_t fs_base;
+    uint64_t gs_base;
+    uint32_t cs_arbytes;
+    uint32_t _pad;
+};
+
+struct vm_event_mem_access_data {
+    uint64_t gfn;
+    uint64_t offset;
+    uint64_t gla; /* if gla_valid */
+    uint8_t access_r;
+    uint8_t access_w;
+    uint8_t access_x;
+    uint8_t gla_valid;
+    uint8_t fault_with_gla;
+    uint8_t fault_in_gpt;
+    uint16_t _pad;
+};
+
+struct vm_event_mov_to_cr_data {
+    uint64_t new_value;
+    uint64_t old_value;
+};
+
+struct vm_event_software_breakpoint_data {
+    uint64_t gfn;
+};
+
+struct vm_event_singlestep_data {
+    uint64_t gfn;
+};
+
+struct vm_event_mov_to_msr_data {
+    uint64_t msr;
+    uint64_t value;
+};
+
+struct vm_event_paging_data {
+    uint64_t gfn;
+    uint32_t p2mt;
+    uint32_t _pad;
+};
+
+struct vm_event_sharing_data {
+    uint64_t gfn;
+    uint32_t p2mt;
+    uint32_t _pad;
+};
+
+typedef struct vm_event_st {
+    uint32_t version; /* VM_EVENT_INTERFACE_VERSION */
+    uint32_t flags;
+    uint32_t vcpu_id;
+    uint32_t reason; /* VM_EVENT_REASON_* */
+
+    union {
+        struct vm_event_paging_data                mem_paging;
+        struct vm_event_sharing_data               mem_sharing;
+        struct vm_event_mem_access_data            mem_access;
+        struct vm_event_mov_to_cr_data             mov_to_cr;
+        struct vm_event_mov_to_msr_data            mov_to_msr;
+        struct vm_event_software_breakpoint_data   software_breakpoint;
+        struct vm_event_singlestep_data            singlestep;
+    } data;
+
+    union {
+        struct vm_event_regs_x86 x86;
+    } regs;
+} vm_event_request_t, vm_event_response_t;
+
+DEFINE_RING_TYPES(vm_event, vm_event_request_t, vm_event_response_t);
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+#endif /* _XEN_PUBLIC_VM_EVENT_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 64a2bd3..681efa9 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -24,6 +24,7 @@
 #include <public/sysctl.h>
 #include <public/vcpu.h>
 #include <public/mem_event.h>
+#include <public/vm_event.h>
 #include <public/event_channel.h>
 
 #ifdef CONFIG_COMPAT
@@ -216,6 +217,8 @@ struct vcpu
 
     /* VCPU paused for mem_event replies. */
     atomic_t         mem_event_pause_count;
+    /* VCPU paused for vm_event replies. */
+    atomic_t         vm_event_pause_count;
     /* VCPU paused by system controller. */
     int              controller_pause_count;
 
@@ -292,6 +295,41 @@ struct mem_event_per_domain
     struct mem_event_domain monitor;
 };
 
+/* VM event */
+struct vm_event_domain
+{
+    /* ring lock */
+    spinlock_t ring_lock;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char target_producers;
+    /* shared ring page */
+    void *ring_page;
+    struct page_info *ring_pg_struct;
+    /* front-end ring */
+    vm_event_front_ring_t front_ring;
+    /* event channel port (vcpu0 only) */
+    int xen_port;
+    /* vm_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
+    /* The last vcpu woken up */
+    unsigned int last_vcpu_wake_up;
+};
+
+struct vm_event_per_domain
+{
+    /* Memory sharing support */
+    struct vm_event_domain share;
+    /* Memory paging support */
+    struct vm_event_domain paging;
+    /* VM event monitor support */
+    struct vm_event_domain monitor;
+};
+
 struct evtchn_port_ops;
 
 /*
@@ -445,6 +483,9 @@ struct domain
     /* Various mem_events */
     struct mem_event_per_domain *mem_event;
 
+    /* Various vm_events */
+    struct vm_event_per_domain *vm_event;
+
     /*
      * Can be specified by the user. If that is not the case, it is
      * computed from the union of all the vcpu cpu-affinity masks.
diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
new file mode 100644
index 0000000..988ea42
--- /dev/null
+++ b/xen/include/xen/vm_event.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ * vm_event.h
+ *
+ * Common interface for memory event support.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __VM_EVENT_H__
+#define __VM_EVENT_H__
+
+#include <xen/sched.h>
+
+#ifdef HAS_MEM_ACCESS
+
+/* Clean up on domain destruction */
+void vm_event_cleanup(struct domain *d);
+
+/* Returns whether a ring has been set up */
+bool_t vm_event_check_ring(struct vm_event_domain *med);
+
+/* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no
+ * available space and the caller is a foreign domain. If the guest itself
+ * is the caller, -EBUSY is avoided by sleeping on a wait queue to ensure
+ * that the ring does not lose future events.
+ *
+ * However, the allow_sleep flag can be set to false in cases in which it is ok
+ * to lose future events, and thus -EBUSY can be returned to guest vcpus
+ * (handle with care!).
+ *
+ * In general, you must follow a claim_slot() call with either put_request() or
+ * cancel_slot(), both of which are guaranteed to
+ * succeed.
+ */
+int __vm_event_claim_slot(struct domain *d, struct vm_event_domain *med,
+                            bool_t allow_sleep);
+static inline int vm_event_claim_slot(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return __vm_event_claim_slot(d, med, 1);
+}
+
+static inline int vm_event_claim_slot_nosleep(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return __vm_event_claim_slot(d, med, 0);
+}
+
+void vm_event_cancel_slot(struct domain *d, struct vm_event_domain *med);
+
+void vm_event_put_request(struct domain *d, struct vm_event_domain *med,
+                            vm_event_request_t *req);
+
+int vm_event_get_response(struct domain *d, struct vm_event_domain *med,
+                           vm_event_response_t *rsp);
+
+int do_vm_event_op(int op, uint32_t domain, void *arg);
+int vm_event_domctl(struct domain *d, xen_domctl_vm_event_op_t *mec,
+                     XEN_GUEST_HANDLE_PARAM(void) u_domctl);
+
+void vm_event_vcpu_pause(struct vcpu *v);
+void vm_event_vcpu_unpause(struct vcpu *v);
+
+#else
+
+static inline void vm_event_cleanup(struct domain *d) {}
+
+static inline bool_t vm_event_check_ring(struct vm_event_domain *med)
+{
+    return 0;
+}
+
+static inline int vm_event_claim_slot(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return -ENOSYS;
+}
+
+static inline int vm_event_claim_slot_nosleep(struct domain *d,
+                                        struct vm_event_domain *med)
+{
+    return -ENOSYS;
+}
+
+static inline
+void vm_event_cancel_slot(struct domain *d, struct vm_event_domain *med)
+{}
+
+static inline
+void vm_event_put_request(struct domain *d, struct vm_event_domain *med,
+                            vm_event_request_t *req)
+{}
+
+static inline
+int vm_event_get_response(struct domain *d, struct vm_event_domain *med,
+                           vm_event_response_t *rsp)
+{
+    return -ENOSYS;
+}
+
+static inline int do_vm_event_op(int op, uint32_t domain, void *arg)
+{
+    return -ENOSYS;
+}
+
+static inline
+int vm_event_domctl(struct domain *d, xen_domctl_vm_event_op_t *mec,
+                     XEN_GUEST_HANDLE_PARAM(void) u_domctl)
+{
+    return -ENOSYS;
+}
+
+static inline void vm_event_vcpu_pause(struct vcpu *v) {}
+static inline void vm_event_vcpu_unpause(struct vcpu *v) {}
+
+#endif /* HAS_MEM_ACCESS */
+
+#endif /* __VM_EVENT_H__ */
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index f20e89c..d6d403a 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -525,6 +525,18 @@ static XSM_INLINE int xsm_mem_event_op(XSM_DEFAULT_ARG struct domain *d, int op)
     XSM_ASSERT_ACTION(XSM_DM_PRIV);
     return xsm_default_action(action, current->domain, d);
 }
+
+static XSM_INLINE int xsm_vm_event_control(XSM_DEFAULT_ARG struct domain *d, int mode, int op)
+{
+    XSM_ASSERT_ACTION(XSM_PRIV);
+    return xsm_default_action(action, current->domain, d);
+}
+
+static XSM_INLINE int xsm_vm_event_op(XSM_DEFAULT_ARG struct domain *d, int op)
+{
+    XSM_ASSERT_ACTION(XSM_DM_PRIV);
+    return xsm_default_action(action, current->domain, d);
+}
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 4ce089f..581e712 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -144,6 +144,8 @@ struct xsm_operations {
 #ifdef HAS_MEM_ACCESS
     int (*mem_event_control) (struct domain *d, int mode, int op);
     int (*mem_event_op) (struct domain *d, int op);
+    int (*vm_event_control) (struct domain *d, int mode, int op);
+    int (*vm_event_op) (struct domain *d, int op);
 #endif
 
 #ifdef CONFIG_X86
@@ -553,6 +555,16 @@ static inline int xsm_mem_event_op (xsm_default_t def, struct domain *d, int op)
 {
     return xsm_ops->mem_event_op(d, op);
 }
+
+static inline int xsm_vm_event_control (xsm_default_t def, struct domain *d, int mode, int op)
+{
+    return xsm_ops->vm_event_control(d, mode, op);
+}
+
+static inline int xsm_vm_event_op (xsm_default_t def, struct domain *d, int op)
+{
+    return xsm_ops->vm_event_op(d, op);
+}
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 8eb3050..3cf5126 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -121,6 +121,8 @@ void xsm_fixup_ops (struct xsm_operations *ops)
 #ifdef HAS_MEM_ACCESS
     set_to_dummy_if_null(ops, mem_event_control);
     set_to_dummy_if_null(ops, mem_event_op);
+    set_to_dummy_if_null(ops, vm_event_control);
+    set_to_dummy_if_null(ops, vm_event_op);
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index d48463f..05ebf03 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1212,6 +1212,16 @@ static int flask_mem_event_op(struct domain *d, int op)
 {
     return current_has_perm(d, SECCLASS_HVM, HVM__MEM_EVENT);
 }
+
+static int flask_vm_event_control(struct domain *d, int mode, int op)
+{
+    return current_has_perm(d, SECCLASS_HVM, HVM__VM_EVENT);
+}
+
+static int flask_vm_event_op(struct domain *d, int op)
+{
+    return current_has_perm(d, SECCLASS_HVM, HVM__VM_EVENT);
+}
 #endif /* HAS_MEM_ACCESS */
 
 #ifdef CONFIG_X86
@@ -1599,6 +1609,8 @@ static struct xsm_operations flask_ops = {
 #ifdef HAS_MEM_ACCESS
     .mem_event_control = flask_mem_event_control,
     .mem_event_op = flask_mem_event_op,
+    .vm_event_control = flask_vm_event_control,
+    .vm_event_op = flask_vm_event_op,
 #endif
 
 #ifdef CONFIG_X86
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 1da9f63..a4241b5 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -250,6 +250,7 @@ class hvm
     hvmctl
 # XEN_DOMCTL_set_access_required
     mem_event
+    vm_event
 # XEN_DOMCTL_mem_sharing_op and XENMEM_sharing_op_{share,add_physmap} with:
 #  source = the domain making the hypercall
 #  target = domain whose memory is being shared
-- 
2.1.4

  parent reply	other threads:[~2015-01-29 21:46 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-29 21:46 [RFC PATCH V3 00/12] xen: Clean-up of mem_event subsystem Tamas K Lengyel
2015-01-29 21:46 ` [RFC PATCH V3 01/12] xen/mem_event: Cleanup of mem_event structures Tamas K Lengyel
2015-02-02 17:19   ` Ian Campbell
2015-02-03  9:17     ` Jan Beulich
2015-02-05 12:12       ` Tamas K Lengyel
2015-02-05 12:13     ` Tamas K Lengyel
2015-02-03 15:32   ` Jan Beulich
2015-01-29 21:46 ` [RFC PATCH V3 02/12] xen/mem_event: Rename the mem_event ring from 'access' to 'monitor' Tamas K Lengyel
2015-02-02 17:22   ` Ian Campbell
2015-02-03 15:37   ` Jan Beulich
2015-02-05 14:24     ` Tamas K Lengyel
2015-01-29 21:46 ` [RFC PATCH V3 03/12] xen/mem_paging: Convert mem_event_op to mem_paging_op Tamas K Lengyel
2015-02-02 17:23   ` Ian Campbell
2015-02-03 15:41   ` Jan Beulich
2015-01-29 21:46 ` [RFC PATCH V3 04/12] xen/mem_access: Merge mem_event sanity check into mem_access check Tamas K Lengyel
2015-01-29 21:46 ` Tamas K Lengyel [this message]
2015-01-30 17:25   ` [RFC PATCH V3 05/12] xen: Introduce vm_event Daniel De Graaf
2015-01-31 13:24     ` Tamas K Lengyel
2015-02-02 19:35       ` Daniel De Graaf
2015-02-06 14:04         ` Tamas K Lengyel
2015-02-02 17:27   ` Ian Campbell
2015-02-03 15:54   ` Jan Beulich
2015-02-06 13:54     ` Tamas K Lengyel
2015-02-06 13:58       ` Andrew Cooper
2015-02-06 14:01         ` Tamas K Lengyel
2015-01-29 21:46 ` [RFC PATCH V3 06/12] xen: migrate mem_event to vm_event Tamas K Lengyel
2015-02-02 17:27   ` Ian Campbell
2015-02-03 16:22   ` Jan Beulich
2015-01-29 21:46 ` [RFC PATCH V3 07/12] xen: Remove mem_event Tamas K Lengyel
2015-01-30 17:25   ` Daniel De Graaf
2015-02-02 17:29   ` Ian Campbell
2015-02-03 16:26   ` Jan Beulich
2015-02-06 12:54     ` Tamas K Lengyel
2015-02-06 14:18       ` Jan Beulich
2015-02-06 16:13         ` Tamas K Lengyel
2015-01-29 21:46 ` [RFC PATCH V3 08/12] tools/tests: Clean-up tools/tests/xen-access Tamas K Lengyel
2015-02-02 17:30   ` Ian Campbell
2015-01-29 21:46 ` [RFC PATCH V3 09/12] x86/hvm: factor out and rename vm_event related functions into separate file Tamas K Lengyel
2015-02-04  5:54   ` Tian, Kevin
2015-02-04  9:14   ` Jan Beulich
2015-01-29 21:46 ` [RFC PATCH V3 10/12] xen: Introduce monitor_op domctl Tamas K Lengyel
2015-01-30  7:58   ` Razvan Cojocaru
2015-01-30 10:38     ` Tamas K Lengyel
2015-01-30 11:07       ` Razvan Cojocaru
2015-01-30 11:15         ` Tamas K Lengyel
2015-01-30 11:24           ` Tamas K Lengyel
2015-01-30 11:33           ` Razvan Cojocaru
2015-01-30 11:45             ` Tamas K Lengyel
2015-01-30 12:24               ` Razvan Cojocaru
2015-01-30 12:36                 ` Tamas K Lengyel
2015-02-02 17:32   ` Ian Campbell
2015-02-04  5:57   ` Tian, Kevin
2015-02-04  9:34   ` Jan Beulich
2015-02-05 14:15     ` Tamas K Lengyel
2015-02-09 18:45       ` Tamas K Lengyel
2015-01-29 21:46 ` [RFC PATCH V3 11/12] xen/vm_event: Decouple vm_event and mem_access Tamas K Lengyel
2015-02-04  9:47   ` Jan Beulich
2015-02-06 13:10     ` Tamas K Lengyel
2015-02-06 14:20       ` Jan Beulich
2015-02-06 16:12         ` Tamas K Lengyel
2015-01-29 21:46 ` [RFC PATCH V3 12/12] xen/vm_event: Check for VM_EVENT_FLAG_DUMMY only in Debug builds Tamas K Lengyel
2015-02-04  5:59   ` Tian, Kevin
2015-02-06 13:20     ` Tamas K Lengyel
2015-02-04  9:49   ` Jan Beulich
2015-02-06 13:22     ` Tamas K Lengyel
2015-02-02 17:33 ` [RFC PATCH V3 00/12] xen: Clean-up of mem_event subsystem Ian Campbell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1422567998-29995-6-git-send-email-tamas.lengyel@zentific.com \
    --to=tamas.lengyel@zentific.com \
    --cc=andres@lagarcavilla.org \
    --cc=dgdegra@tycho.nsa.gov \
    --cc=eddie.dong@intel.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=keir@xen.org \
    --cc=kevin.tian@intel.com \
    --cc=rcojocaru@bitdefender.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=steve@zentific.com \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.