All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2 of 6] REDO: mem_access & mem_access 2: mem event additions for access
@ 2011-01-04 22:07 Joe Epstein
  2011-01-05 14:15 ` Tim Deegan
  0 siblings, 1 reply; 4+ messages in thread
From: Joe Epstein @ 2011-01-04 22:07 UTC (permalink / raw)
  To: xen-devel

* Adds an ACCESS memory event type, with RESUME as the action.

* Refactors the bits in the memory event to store whether the memory event
  was a read, write, or execute (for access memory events only).  I used
  bits sparingly to keep the structure somewhat the same size.

* Modified VMX to report the needed information in its nested page fault.
  SVM is not implemented in this patch series.

Signed-off-by: Joe Epstein <jepstein98@gmail.com>


diff -r a3cec4b94150 -r 06b0916eb91d xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/include/public/domctl.h	Tue Jan 04 11:59:48 2011 -0800
@@ -714,7 +714,7 @@
 /*
  * Page memory in and out.
  */
-#define XEN_DOMCTL_MEM_EVENT_OP_PAGING (1 << 0)
+#define XEN_DOMCTL_MEM_EVENT_OP_PAGING            1

 /* Domain memory paging */
 #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE   0
@@ -722,6 +722,12 @@
 #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP       2
 #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME     3

+/*
+ * Access permissions
+ */
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS            2
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_RESUME     0
+
 struct xen_domctl_mem_event_op {
     uint32_t       op;           /* XEN_DOMCTL_MEM_EVENT_OP_* */
     uint32_t       mode;         /* XEN_DOMCTL_MEM_EVENT_ENABLE_* */
diff -r a3cec4b94150 -r 06b0916eb91d xen/include/public/mem_event.h
--- a/xen/include/public/mem_event.h	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/include/public/mem_event.h	Tue Jan 04 11:59:48 2011 -0800
@@ -26,18 +26,40 @@
 #include "xen.h"
 #include "io/ring.h"

+/* Memory event type */
+#define MEM_EVENT_TYPE_SHARED   0
+#define MEM_EVENT_TYPE_PAGING   1
+#define MEM_EVENT_TYPE_ACCESS   2
+
 /* Memory event flags */
 #define MEM_EVENT_FLAG_VCPU_PAUSED  (1 << 0)

+/* Reasons for the memory event request */
+#define MEM_EVENT_REASON_UNKNOWN     0    /* typical reason */
+#define MEM_EVENT_REASON_VIOLATION   1    /* access violation, GFN is
address */
+
 typedef struct mem_event_shared_page {
     uint32_t port;
 } mem_event_shared_page_t;

 typedef struct mem_event_st {
+    uint16_t type;
+    uint16_t flags;
+    uint32_t vcpu_id;
+
     uint64_t gfn;
+    uint64_t offset;
+    uint64_t gla; /* if gla_valid */
+
     uint32_t p2mt;
-    uint32_t vcpu_id;
-    uint64_t flags;
+
+    uint16_t access_r:1;
+    uint16_t access_w:1;
+    uint16_t access_x:1;
+    uint16_t gla_valid:1;
+    uint16_t available:12;
+
+    uint16_t reason;
 } mem_event_request_t, mem_event_response_t;

 DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t);
diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/include/asm-x86/mem_event.h	Tue Jan 04 11:59:48 2011 -0800
@@ -24,6 +24,8 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__

+/* Returns true if a listener exists, else pauses VCPU */
+int mem_event_check_listener(struct domain *d);
 int mem_event_check_ring(struct domain *d);
 void mem_event_put_request(struct domain *d, mem_event_request_t *req);
 void mem_event_get_response(struct domain *d, mem_event_response_t *rsp);
diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/include/asm-x86/p2m.h	Tue Jan 04 11:59:48 2011 -0800
@@ -522,6 +522,14 @@
 { }
 #endif

+/* Send mem event based on the access (gla is -1ull if not available),
+ * return true if the event will be taken care of by a mem event
listener.  Handles
+ * rw2rx conversion */
+int p2m_mem_access_check(unsigned long gpa, bool_t gla_valid,
unsigned long gla,
+                         bool_t access_r, bool_t access_w, bool_t access_x);
+/* Resumes the running of the VCPU, restarting the last instruction */
+void p2m_mem_access_resume(struct p2m_domain *p2m);
+
 struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);

 #endif /* _XEN_P2M_H */
diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/arch/x86/hvm/hvm.c	Tue Jan 04 11:59:48 2011 -0800
@@ -61,6 +61,8 @@
 #include <public/hvm/ioreq.h>
 #include <public/version.h>
 #include <public/memory.h>
+#include <asm/mem_event.h>
+#include <public/mem_event.h>

 bool_t __read_mostly hvm_enabled;

@@ -1086,61 +1088,91 @@
     domain_shutdown(v->domain, SHUTDOWN_reboot);
 }

-bool_t hvm_hap_nested_page_fault(unsigned long gfn)
+bool_t hvm_hap_nested_page_fault(unsigned long gpa,
+                                 bool_t gla_valid,
+                                 unsigned long gla,
+                                 bool_t access_r,
+                                 bool_t access_w,
+                                 bool_t access_x)
 {
+    unsigned long gfn = gpa >> PAGE_SHIFT;
     p2m_type_t p2mt;
     mfn_t mfn;
     struct vcpu *v = current;
     struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
+    int guest_fault = 0;

     mfn = gfn_to_mfn_guest(p2m, gfn, &p2mt);

-    /*
-     * If this GFN is emulated MMIO or marked as read-only, pass the fault
-     * to the mmio handler.
-     */
-    if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
+#ifdef __x86_64__
+    /* Check if the page has been paged out */
+    if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) )
     {
-        if ( !handle_mmio() )
-            hvm_inject_exception(TRAP_gp_fault, 0, 0);
+        p2m_mem_paging_populate(p2m, gfn);
         return 1;
     }

-#ifdef __x86_64__
-    /* Check if the page has been paged out */
-    if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) )
-        p2m_mem_paging_populate(p2m, gfn);
-
     /* Mem sharing: unshare the page and try again */
-    if ( p2mt == p2m_ram_shared )
+    if ( p2mt == p2m_ram_shared && access_w )
     {
         mem_sharing_unshare_page(p2m, gfn, 0);
         return 1;
     }
 #endif
-
-    /* Spurious fault? PoD and log-dirty also take this path. */
-    if ( p2m_is_ram(p2mt) )
+
+    /*
+     * If this GFN is emulated MMIO or marked as read-only (which old
MMIO is),
+     * pass the fault to the mmio handler first.
+     */
+    if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
     {
-        /*
-         * Page log dirty is always done with order 0. If this mfn resides in
-         * a large page, we do not change other pages type within that large
-         * page.
-         */
-        paging_mark_dirty(v->domain, mfn_x(mfn));
-        p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw);
+        if ( !handle_mmio() )
+        {
+            guest_fault = 1;
+            goto check_access_handler;
+        }
         return 1;
     }

-    /* Shouldn't happen: Maybe the guest was writing to a r/o grant mapping? */
-    if ( p2mt == p2m_grant_map_ro )
+    /* Was it a write access: log-dirty, etc... */
+    if ( access_w ) {
+        /* PoD and log-dirty also take this path. */
+        if ( p2mt != p2m_ram_rw && p2m_is_ram(p2mt) )
+        {
+            /*
+             * Page log dirty is always done with order 0. If this
mfn resides in
+             * a large page, we do not change other pages type within
that large
+             * page.
+             */
+            paging_mark_dirty(v->domain, mfn_x(mfn));
+            p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw);
+            return 1;
+        }
+
+        /* Shouldn't happen: Maybe the guest was writing to a r/o
grant mapping? */
+        if ( p2mt == p2m_grant_map_ro )
+        {
+            gdprintk(XENLOG_WARNING,
+                     "trying to write to read-only grant mapping\n");
+            guest_fault = 1;
+            goto check_access_handler;
+        }
+    } /* end access_w */
+
+ check_access_handler:
+    /* Even if it is the guest's "fault", check with the mem_event
interface instead if
+     * one is there */
+    if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r,
access_w, access_x) )
+        return 1;
+
+    /* If there is no handler, then fault if guest_fault = 1 */
+    if ( guest_fault )
     {
-        gdprintk(XENLOG_WARNING,
-                 "trying to write to read-only grant mapping\n");
         hvm_inject_exception(TRAP_gp_fault, 0, 0);
         return 1;
     }

+    /* Nothing handled it: it must be an access error with no memory
handler, so fail */
     return 0;
 }

diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/arch/x86/hvm/svm/svm.c	Tue Jan 04 11:59:48 2011 -0800
@@ -979,7 +979,7 @@
         __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
     }

-    if ( hvm_hap_nested_page_fault(gfn) )
+    if ( hvm_hap_nested_page_fault(gpa, 0, ~0ull, 0, 0, 0) )
         return;

     /* Everything else is an error. */
diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Jan 04 11:59:48 2011 -0800
@@ -2079,7 +2079,13 @@
         __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
     }

-    if ( hvm_hap_nested_page_fault(gfn) )
+    if ( hvm_hap_nested_page_fault(gpa,
+                                   qualification & EPT_GLA_VALID       ? 1 : 0,
+                                   qualification & EPT_GLA_VALID
+                                     ? __vmread(GUEST_LINEAR_ADDRESS) : ~0ull,
+                                   qualification & EPT_READ_VIOLATION  ? 1 : 0,
+                                   qualification & EPT_WRITE_VIOLATION ? 1 : 0,
+                                   qualification & EPT_EXEC_VIOLATION
 ? 1 : 0) )
         return;

     /* Everything else is an error. */
diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/Makefile
--- a/xen/arch/x86/mm/Makefile	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/arch/x86/mm/Makefile	Tue Jan 04 11:59:48 2011 -0800
@@ -9,6 +9,7 @@
 obj-$(x86_64) += mem_event.o
 obj-$(x86_64) += mem_paging.o
 obj-$(x86_64) += mem_sharing.o
+obj-$(x86_64) += mem_access.o

 guest_walk_%.o: guest_walk.c Makefile
 	$(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/mem_access.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/mem_access.c	Tue Jan 04 11:59:48 2011 -0800
@@ -0,0 +1,59 @@
+/******************************************************************************
+ * arch/x86/mm/mem_access.c
+ *
+ * Memory access support.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <asm/p2m.h>
+#include <asm/mem_event.h>
+
+
+int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
+                      XEN_GUEST_HANDLE(void) u_domctl)
+{
+    int rc;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    switch( mec->op )
+    {
+    case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_RESUME:
+    {
+        p2m_mem_access_resume(p2m);
+        rc = 0;
+    }
+    break;
+
+    default:
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
+}
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/arch/x86/mm/mem_event.c	Tue Jan 04 11:59:48 2011 -0800
@@ -26,6 +26,7 @@
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
+#include <asm/mem_access.h>

 /* for public/io/ring.h macros */
 #define xen_mb()   mb()
@@ -67,6 +68,9 @@

     mem_event_ring_lock_init(d);

+    /* Wake any VCPUs paused for memory events */
+    mem_event_unpause_vcpus(d);
+
     return 0;

  err_shared:
@@ -143,12 +147,32 @@
             vcpu_wake(v);
 }

+int mem_event_check_listener(struct domain *d) {
+    struct vcpu *curr = current;
+
+    /* If a listener exists, return */
+    if ( d->mem_event.ring_page )
+        return 1;
+
+    /* Sleep the VCPU */
+    if ( (curr->domain->domain_id == d->domain_id) )
+    {
+        set_bit(_VPF_mem_event, &curr->pause_flags);
+        vcpu_sleep_nosync(curr);
+    }
+
+    return 0;
+}
+
 int mem_event_check_ring(struct domain *d)
 {
     struct vcpu *curr = current;
     int free_requests;
     int ring_full;

+    if ( !d->mem_event.ring_page )
+        return -1;
+
     mem_event_ring_lock(d);

     free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring);
@@ -157,7 +181,7 @@
         gdprintk(XENLOG_INFO, "free request slots: %d\n", free_requests);
         WARN_ON(free_requests == 0);
     }
-    ring_full = free_requests < MEM_EVENT_RING_THRESHOLD;
+    ring_full = free_requests < MEM_EVENT_RING_THRESHOLD ? 1 : 0;

     if ( (curr->domain->domain_id == d->domain_id) && ring_full )
     {
@@ -203,7 +227,11 @@
         return rc;
 #endif

-    if ( mec->mode == 0 )
+    rc = -ENOSYS;
+
+    switch ( mec-> mode )
+    {
+    case 0:
     {
         switch( mec->op )
         {
@@ -268,13 +296,18 @@
             rc = -ENOSYS;
             break;
         }
+        break;
     }
-    else
+    case XEN_DOMCTL_MEM_EVENT_OP_PAGING:
     {
-        rc = -ENOSYS;
-
-        if ( mec->mode & XEN_DOMCTL_MEM_EVENT_OP_PAGING )
-            rc = mem_paging_domctl(d, mec, u_domctl);
+        rc = mem_paging_domctl(d, mec, u_domctl);
+        break;
+    }
+    case XEN_DOMCTL_MEM_EVENT_OP_ACCESS:
+    {
+        rc = mem_access_domctl(d, mec, u_domctl);
+        break;
+    }
     }

     return rc;
diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/arch/x86/mm/p2m.c	Tue Jan 04 11:59:48 2011 -0800
@@ -2853,6 +2853,98 @@
 }
 #endif /* __x86_64__ */

+int p2m_mem_access_check(unsigned long gpa, bool_t gla_valid,
unsigned long gla,
+                         bool_t access_r, bool_t access_w, bool_t access_x)
+{
+    struct vcpu *v = current;
+    mem_event_request_t req;
+    unsigned long gfn = gpa >> PAGE_SHIFT;
+    struct domain *d = v->domain;
+    struct p2m_domain* p2m = p2m_get_hostp2m(d);
+    int res;
+    mfn_t mfn;
+    p2m_type_t p2mt;
+    p2m_access_t p2ma;
+
+    /* First, handle rx2rw conversion automatically */
+    mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query);
+
+    if ( access_w && p2ma == p2m_access_rx2rw ) {
+        p2m_lock(p2m);
+        p2m->set_entry(p2m, gfn, mfn, 0, p2mt, p2m_access_rw);
+        p2m_unlock(p2m);
+
+        return 1;  /* handled */
+    }
+
+    /* Otherwise, check if there is a memory event listener, and send
the message along */
+    res = mem_event_check_ring(d);
+    if ( res < 0 )
+    {
+        /* No listener */
+        if ( p2m->access_required )
+        {
+            printk(XENLOG_INFO
+                   "Memory access permissions failure, no mem_event
listener: pausing VCPU %d, dom %d\n",
+                   v->vcpu_id, d->domain_id);
+
+            /* Will pause the VCPU */
+            (void) mem_event_check_listener(d);
+        }
+        else
+        {
+            /* A listener is not required, so clear the access restrictions */
+            p2m_lock(p2m);
+            p2m->set_entry(p2m, gfn, mfn, 0, p2mt, p2m_access_rwx);
+            p2m_unlock(p2m);
+        }
+
+        return 1;
+    }
+    else if ( res > 0 )
+        return 1;  /* No space in buffer */
+
+    memset(&req, 0, sizeof(req));
+    req.type = MEM_EVENT_TYPE_ACCESS;
+    req.reason = MEM_EVENT_REASON_VIOLATION;
+
+    /* Pause the current VCPU unconditionally */
+    vcpu_pause_nosync(v);
+    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+
+    /* Send request to mem event */
+    req.gfn = gfn;
+    req.offset = gpa & ((1 << PAGE_SHIFT) - 1);
+    req.gla_valid = gla_valid;
+    req.gla = gla;
+    req.access_r = access_r;
+    req.access_w = access_w;
+    req.access_x = access_x;
+
+    req.vcpu_id = v->vcpu_id;
+
+    mem_event_put_request(d, &req);
+
+    /* VCPU paused, mem event request sent */
+    return 1;
+}
+
+void p2m_mem_access_resume(struct p2m_domain *p2m)
+{
+    struct domain *d = p2m->domain;
+    mem_event_response_t rsp;
+
+    mem_event_get_response(d, &rsp);
+
+    /* Unpause domain */
+    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+
+    /* Unpause any domains that were paused because the ring was full
or no listener
+     * was available */
+    mem_event_unpause_vcpus(d);
+}
+
 /*
  * Local variables:
  * mode: C
diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h	Tue Jan 04 10:30:15 2011 -0800
+++ b/xen/include/asm-x86/hvm/hvm.h	Tue Jan 04 11:59:48 2011 -0800
@@ -356,7 +356,10 @@

 int hvm_debug_op(struct vcpu *v, int32_t op);

-bool_t hvm_hap_nested_page_fault(unsigned long gfn);
+bool_t hvm_hap_nested_page_fault(unsigned long gpa,
+				 bool_t gla_valid, unsigned long gla,
+				 bool_t access_r, bool_t access_w,
+				 bool_t access_x);

 #define hvm_msr_tsc_aux(v) ({                                               \
     struct domain *__d = (v)->domain;                                       \
diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/mem_access.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/mem_access.h	Tue Jan 04 11:59:48 2011 -0800
@@ -0,0 +1,35 @@
+/******************************************************************************
+ * include/asm-x86/mem_paging.h
+ *
+ * Memory access support.
+ *
+ * Copyright (c) 2009 Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
+                      XEN_GUEST_HANDLE(void) u_domctl);
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r a3cec4b94150 -r 06b0916eb91d tools/xenpaging/xenpaging.c
--- a/tools/xenpaging/xenpaging.c	Tue Jan 04 10:30:15 2011 -0800
+++ b/tools/xenpaging/xenpaging.c	Tue Jan 04 11:59:48 2011 -0800
@@ -658,7 +658,7 @@
             {
                 DPRINTF("page already populated (domain = %d; vcpu = %d;"
                         " p2mt = %x;"
-                        " gfn = %"PRIx64"; paused = %"PRId64")\n",
+                        " gfn = %"PRIx64"; paused = %d)\n",
                         paging->mem_event.domain_id, req.vcpu_id,
                         req.p2mt,
                         req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2 of 6] REDO: mem_access & mem_access 2: mem event additions for access
  2011-01-04 22:07 [PATCH 2 of 6] REDO: mem_access & mem_access 2: mem event additions for access Joe Epstein
@ 2011-01-05 14:15 ` Tim Deegan
  2011-01-05 14:47   ` Joe Epstein
  0 siblings, 1 reply; 4+ messages in thread
From: Tim Deegan @ 2011-01-05 14:15 UTC (permalink / raw)
  To: Joe Epstein; +Cc: xen-devel

Hi, 

Thanks for the patches.  As Keir points out, we'll need them in a form
that applies cleanly (maybe send as attachments next time).  It's also
useful when reviewing patches if they're in 'diff -p' format.  In
mercurial, you can arrabnge that by adding these lines to your ~/.hgrc
file:

 [diff]
 showfunc = True

Further comments inline below.  Nothing too hard to address, I think.

At 22:07 +0000 on 04 Jan (1294178833), Joe Epstein wrote:
> diff -r a3cec4b94150 -r 06b0916eb91d xen/include/public/domctl.h
> --- a/xen/include/public/domctl.h       Tue Jan 04 10:30:15 2011 -0800
> +++ b/xen/include/public/domctl.h       Tue Jan 04 11:59:48 2011 -0800
> @@ -714,7 +714,7 @@
>  /*
>   * Page memory in and out.
>   */
> -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING (1 << 0)
> +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING            1
> 
>  /* Domain memory paging */
>  #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE   0
> @@ -722,6 +722,12 @@
>  #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP       2
>  #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME     3
> 
> +/*
> + * Access permissions
> + */
> +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS            2
> +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_RESUME     0

These could do with a nice big block comment that describes what they
do. 

[...]

> diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/mem_event.h
> --- a/xen/include/asm-x86/mem_event.h   Tue Jan 04 10:30:15 2011 -0800
> +++ b/xen/include/asm-x86/mem_event.h   Tue Jan 04 11:59:48 2011 -0800
> @@ -24,6 +24,8 @@
>  #ifndef __MEM_EVENT_H__
>  #define __MEM_EVENT_H__
> 
> +/* Returns true if a listener exists, else pauses VCPU */
> +int mem_event_check_listener(struct domain *d);

That's a pretty odd thing for a function to do.  I see below that the
only caller already knows there's no listener and ignores the return
value.   I think you can just get rid of this function entirely.

[...] 

> +
> +    /*
> +     * If this GFN is emulated MMIO or marked as read-only (which old
> MMIO is),
> +     * pass the fault to the mmio handler first.
> +     */

Why did you change this comment?  Pages marked as p2m_ram_ro are not
"old MMIO", they're ROM images &c, and go through the emulator so the
write can be correctly discarded. 

> +    if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
>      {
> -        /*
> -         * Page log dirty is always done with order 0. If this mfn resides in
> -         * a large page, we do not change other pages type within that large
> -         * page.
> -         */
> -        paging_mark_dirty(v->domain, mfn_x(mfn));
> -        p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw);
> +        if ( !handle_mmio() )
> +        {
> +            guest_fault = 1;
> +            goto check_access_handler;
> +        }
>          return 1;
>      }
> 
> -    /* Shouldn't happen: Maybe the guest was writing to a r/o grant mapping? */
> -    if ( p2mt == p2m_grant_map_ro )
> +    /* Was it a write access: log-dirty, etc... */
> +    if ( access_w ) {
> +        /* PoD and log-dirty also take this path. */

Moving the log-dirty check inside the test for access_w reintroduces a
race condition in multi-vcpu systems (where the same log-dirty fault
happens on two CPUs and the first CPU has changed the type back to 
p2m_ram_rw by the time the second one looks up the type).  Please put
this case back unconditionally.

> +        if ( p2mt != p2m_ram_rw && p2m_is_ram(p2mt) )
> +        {
> +            /*
> +             * Page log dirty is always done with order 0. If this
> mfn resides in
> +             * a large page, we do not change other pages type within
> that large
> +             * page.
> +             */
> +            paging_mark_dirty(v->domain, mfn_x(mfn));
> +            p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw);
> +            return 1;
> +        }
> +
> +        /* Shouldn't happen: Maybe the guest was writing to a r/o
> grant mapping? */
> +        if ( p2mt == p2m_grant_map_ro )
> +        {
> +            gdprintk(XENLOG_WARNING,
> +                     "trying to write to read-only grant mapping\n");
> +            guest_fault = 1;
> +            goto check_access_handler;
> +        }
> +    } /* end access_w */
> +
> + check_access_handler:
> +    /* Even if it is the guest's "fault", check with the mem_event
> interface instead if
> +     * one is there */
> +    if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r,
> access_w, access_x) )
> +        return 1;

p2m_mem_access_check() appears to _always_ return 1.  Was that the
intention? 

> +    /* If there is no handler, then fault if guest_fault = 1 */
> +    if ( guest_fault )
>      {
> -        gdprintk(XENLOG_WARNING,
> -                 "trying to write to read-only grant mapping\n");
>          hvm_inject_exception(TRAP_gp_fault, 0, 0);
>          return 1;
>      }
> 
> +    /* Nothing handled it: it must be an access error with no memory
> handler, so fail */
>      return 0;
>  }
> 
> diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/hvm/svm/svm.c
> --- a/xen/arch/x86/hvm/svm/svm.c        Tue Jan 04 10:30:15 2011 -0800
> +++ b/xen/arch/x86/hvm/svm/svm.c        Tue Jan 04 11:59:48 2011 -0800
> @@ -979,7 +979,7 @@
>          __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
>      }
> 
> -    if ( hvm_hap_nested_page_fault(gfn) )
> +    if ( hvm_hap_nested_page_fault(gpa, 0, ~0ull, 0, 0, 0) )

Surely this totally breaks the AMD NPT case -- you need to get the
access-type arguments right or else not rely on them in
hvm_hap_nested_page_fault().

[...]

> diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/mem_access.c
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/xen/arch/x86/mm/mem_access.c      Tue Jan 04 11:59:48 2011 -0800
> @@ -0,0 +1,59 @@
> +/******************************************************************************
> + * arch/x86/mm/mem_access.c
> + *
> + * Memory access support.
> + *
> + * Copyright (c) 2009 Citrix Systems, Inc.

Eh, probably not. :)

[...]

> diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c     Tue Jan 04 10:30:15 2011 -0800
> +++ b/xen/arch/x86/mm/p2m.c     Tue Jan 04 11:59:48 2011 -0800
> @@ -2853,6 +2853,98 @@
>  }
>  #endif /* __x86_64__ */
> 
> +int p2m_mem_access_check(unsigned long gpa, bool_t gla_valid,
> unsigned long gla,
> +                         bool_t access_r, bool_t access_w, bool_t access_x)
> +{
> +    struct vcpu *v = current;
> +    mem_event_request_t req;
> +    unsigned long gfn = gpa >> PAGE_SHIFT;
> +    struct domain *d = v->domain;
> +    struct p2m_domain* p2m = p2m_get_hostp2m(d);
> +    int res;
> +    mfn_t mfn;
> +    p2m_type_t p2mt;
> +    p2m_access_t p2ma;
> +
> +    /* First, handle rx2rw conversion automatically */
> +    mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query);
> +
> +    if ( access_w && p2ma == p2m_access_rx2rw ) {
> +        p2m_lock(p2m);
> +        p2m->set_entry(p2m, gfn, mfn, 0, p2mt, p2m_access_rw);

Might be better to use p2m_change_type here; it's atomic so avoids
potential races. 

> +        p2m_unlock(p2m);
> +
> +        return 1;  /* handled */
> +    }
> +
> +    /* Otherwise, check if there is a memory event listener, and send
> the message along */
> +    res = mem_event_check_ring(d);
> +    if ( res < 0 )
> +    {
> +        /* No listener */
> +        if ( p2m->access_required )
> +        {
> +            printk(XENLOG_INFO
> +                   "Memory access permissions failure, no mem_event
> listener: pausing VCPU %d, dom %d\n",
> +                   v->vcpu_id, d->domain_id);
> +
> +            /* Will pause the VCPU */
> +            (void) mem_event_check_listener(d);

Why does this need a special case?  Could you just post the violation 
to the ring and pause as normal, and then if a listener ever arrives it
can handle it?

In fact, I don't see why access_required needs to be a separate flag at
all - it's implied by setting the access permissions on a page or
setting the default to anything other than rwx.  That would address
Keir's concern about adding a separate domcrf flag.

> +        }
> +        else
> +        {
> +            /* A listener is not required, so clear the access restrictions */
> +            p2m_lock(p2m);
> +            p2m->set_entry(p2m, gfn, mfn, 0, p2mt, p2m_access_rwx);
> +            p2m_unlock(p2m);
> +        }
> +
> +        return 1;
> +    }
> +    else if ( res > 0 )
> +        return 1;  /* No space in buffer */

DYM "return 0" here?  I think this function needs a comment describing
what the return value means. 

[...]
> diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/mem_access.h
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/xen/include/asm-x86/mem_access.h  Tue Jan 04 11:59:48 2011 -0800
> @@ -0,0 +1,35 @@
> +/******************************************************************************
> + * include/asm-x86/mem_paging.h
> + *
> + * Memory access support.
> + *
> + * Copyright (c) 2009 Citrix Systems, Inc.

Again, no. 

[...]

> diff -r a3cec4b94150 -r 06b0916eb91d tools/xenpaging/xenpaging.c
> --- a/tools/xenpaging/xenpaging.c       Tue Jan 04 10:30:15 2011 -0800
> +++ b/tools/xenpaging/xenpaging.c       Tue Jan 04 11:59:48 2011 -0800
> @@ -658,7 +658,7 @@
>              {
>                  DPRINTF("page already populated (domain = %d; vcpu = %d;"
>                          " p2mt = %x;"
> -                        " gfn = %"PRIx64"; paused = %"PRId64")\n",
> +                        " gfn = %"PRIx64"; paused = %d)\n",
>                          paging->mem_event.domain_id, req.vcpu_id,
>                          req.p2mt,
>                          req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED);
> 

this belongs in a separate patch; it's not part of the change described
at the top.

Cheers,

Tim.

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2 of 6] REDO: mem_access & mem_access 2: mem event additions for access
  2011-01-05 14:15 ` Tim Deegan
@ 2011-01-05 14:47   ` Joe Epstein
  2011-01-05 15:01     ` Tim Deegan
  0 siblings, 1 reply; 4+ messages in thread
From: Joe Epstein @ 2011-01-05 14:47 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 12849 bytes --]

Hi Tim,

Thanks for the quick response.  I have some questions and comments, which I
put in line.  As you mentioned, I think this should be fairly easy to knock
out.

On Wed, Jan 5, 2011 at 6:15 AM, Tim Deegan <Tim.Deegan@citrix.com> wrote:

> Hi,
>
> Thanks for the patches.  As Keir points out, we'll need them in a form
> that applies cleanly (maybe send as attachments next time).  It's also
> useful when reviewing patches if they're in 'diff -p' format.  In
> mercurial, you can arrabnge that by adding these lines to your ~/.hgrc
> file:
>
>  [diff]
>  showfunc = True
>

Will do*
*

>
> Further comments inline below.  Nothing too hard to address, I think.
>
> At 22:07 +0000 on 04 Jan (1294178833), Joe Epstein wrote:
> > diff -r a3cec4b94150 -r 06b0916eb91d xen/include/public/domctl.h
> > --- a/xen/include/public/domctl.h       Tue Jan 04 10:30:15 2011 -0800
> > +++ b/xen/include/public/domctl.h       Tue Jan 04 11:59:48 2011 -0800
> > @@ -714,7 +714,7 @@
> >  /*
> >   * Page memory in and out.
> >   */
> > -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING (1 << 0)
> > +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING            1
> >
> >  /* Domain memory paging */
> >  #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE   0
> > @@ -722,6 +722,12 @@
> >  #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP       2
> >  #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME     3
> >
> > +/*
> > + * Access permissions
> > + */
> > +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS            2
> > +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_RESUME     0
>
> These could do with a nice big block comment that describes what they
> do.
>
*
*Will do


>
> [...]
>
> > diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/mem_event.h
> > --- a/xen/include/asm-x86/mem_event.h   Tue Jan 04 10:30:15 2011 -0800
> > +++ b/xen/include/asm-x86/mem_event.h   Tue Jan 04 11:59:48 2011 -0800
> > @@ -24,6 +24,8 @@
> >  #ifndef __MEM_EVENT_H__
> >  #define __MEM_EVENT_H__
> >
> > +/* Returns true if a listener exists, else pauses VCPU */
> > +int mem_event_check_listener(struct domain *d);
>
> That's a pretty odd thing for a function to do.  I see below that the
> only caller already knows there's no listener and ignores the return
> value.   I think you can just get rid of this function entirely.
>
>
True.*  *I had this as a separate function only in case others found it
useful in the future, and as a bit of a layer separation, where knowledge of
the _VPF_mem_event bit would happen only in mem_event.c.  If you don't mind
me breaking that, I can move it over.


> [...]
>
> > +
> > +    /*
> > +     * If this GFN is emulated MMIO or marked as read-only (which old
> > MMIO is),
> > +     * pass the fault to the mmio handler first.
> > +     */
>
> Why did you change this comment?  Pages marked as p2m_ram_ro are not
> "old MMIO", they're ROM images &c, and go through the emulator so the
> write can be correctly discarded.
>

No idea.*  *:)  I'll revert that comment.


>
> > +    if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
> >      {
> > -        /*
> > -         * Page log dirty is always done with order 0. If this mfn
> resides in
> > -         * a large page, we do not change other pages type within that
> large
> > -         * page.
> > -         */
> > -        paging_mark_dirty(v->domain, mfn_x(mfn));
> > -        p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw);
> > +        if ( !handle_mmio() )
> > +        {
> > +            guest_fault = 1;
> > +            goto check_access_handler;
> > +        }
> >          return 1;
> >      }
> >
> > -    /* Shouldn't happen: Maybe the guest was writing to a r/o grant
> mapping? */
> > -    if ( p2mt == p2m_grant_map_ro )
> > +    /* Was it a write access: log-dirty, etc... */
> > +    if ( access_w ) {
> > +        /* PoD and log-dirty also take this path. */
>
> Moving the log-dirty check inside the test for access_w reintroduces a
> race condition in multi-vcpu systems (where the same log-dirty fault
> happens on two CPUs and the first CPU has changed the type back to
> p2m_ram_rw by the time the second one looks up the type).  Please put
> this case back unconditionally.
>

Question:* *the conditional I'm replacing just simply says   if (
p2m_is_ram(p2mt) ).  I have to better qualify that to know that this was an
access to a page based on its type and not based on its access permissions.
Otherwise, we'll either take memory events on logdirty, etc., or we'll miss
real ones that got sent to change the page type instead.  What is the right
conditional then, besides access_w, that will direct the handling
appropriately?


>
> > +        if ( p2mt != p2m_ram_rw && p2m_is_ram(p2mt) )
> > +        {
> > +            /*
> > +             * Page log dirty is always done with order 0. If this
> > mfn resides in
> > +             * a large page, we do not change other pages type within
> > that large
> > +             * page.
> > +             */
> > +            paging_mark_dirty(v->domain, mfn_x(mfn));
> > +            p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw);
> > +            return 1;
> > +        }
> > +
> > +        /* Shouldn't happen: Maybe the guest was writing to a r/o
> > grant mapping? */
> > +        if ( p2mt == p2m_grant_map_ro )
> > +        {
> > +            gdprintk(XENLOG_WARNING,
> > +                     "trying to write to read-only grant mapping\n");
> > +            guest_fault = 1;
> > +            goto check_access_handler;
> > +        }
> > +    } /* end access_w */
> > +
> > + check_access_handler:
> > +    /* Even if it is the guest's "fault", check with the mem_event
> > interface instead if
> > +     * one is there */
> > +    if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r,
> > access_w, access_x) )
> > +        return 1;
>
> p2m_mem_access_check() appears to _always_ return 1.  Was that the
> intention?
>

Will change. * *Originally, the thought was that if there were no memory
listener, then crash the guest.  But that was changed, and the interface
wasn't changed as well.


> > +    /* If there is no handler, then fault if guest_fault = 1 */
> > +    if ( guest_fault )
> >      {
> > -        gdprintk(XENLOG_WARNING,
> > -                 "trying to write to read-only grant mapping\n");
> >          hvm_inject_exception(TRAP_gp_fault, 0, 0);
> >          return 1;
> >      }
> >
> > +    /* Nothing handled it: it must be an access error with no memory
> > handler, so fail */
> >      return 0;
> >  }
> >
> > diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/hvm/svm/svm.c
> > --- a/xen/arch/x86/hvm/svm/svm.c        Tue Jan 04 10:30:15 2011 -0800
> > +++ b/xen/arch/x86/hvm/svm/svm.c        Tue Jan 04 11:59:48 2011 -0800
> > @@ -979,7 +979,7 @@
> >          __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
> >      }
> >
> > -    if ( hvm_hap_nested_page_fault(gfn) )
> > +    if ( hvm_hap_nested_page_fault(gpa, 0, ~0ull, 0, 0, 0) )
>
> Surely this totally breaks the AMD NPT case -- you need to get the
> access-type arguments right or else not rely on them in
> hvm_hap_nested_page_fault().
>

*Good pont! *:)  That will get fixed.

>
> [...]
>
> > diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/mem_access.c
> > --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> > +++ b/xen/arch/x86/mm/mem_access.c      Tue Jan 04 11:59:48 2011 -0800
> > @@ -0,0 +1,59 @@
> >
> +/******************************************************************************
> > + * arch/x86/mm/mem_access.c
> > + *
> > + * Memory access support.
> > + *
> > + * Copyright (c) 2009 Citrix Systems, Inc.
>
> Eh, probably not. :)
>
> [...]
>

Hmm.  It's a direct copy of mem_paging.c, which is copyright Citrix.  Since
I changed only two lines out of it, it seemed rather inappropriate to claim
copyright on derivative work.  But, if you'd like, I can make that change to
claim it for ourselves.


> > diff -r a3cec4b94150 -r 06b0916eb91d xen/arch/x86/mm/p2m.c
> > --- a/xen/arch/x86/mm/p2m.c     Tue Jan 04 10:30:15 2011 -0800
> > +++ b/xen/arch/x86/mm/p2m.c     Tue Jan 04 11:59:48 2011 -0800
> > @@ -2853,6 +2853,98 @@
> >  }
> >  #endif /* __x86_64__ */
> >
> > +int p2m_mem_access_check(unsigned long gpa, bool_t gla_valid,
> > unsigned long gla,
> > +                         bool_t access_r, bool_t access_w, bool_t
> access_x)
> > +{
> > +    struct vcpu *v = current;
> > +    mem_event_request_t req;
> > +    unsigned long gfn = gpa >> PAGE_SHIFT;
> > +    struct domain *d = v->domain;
> > +    struct p2m_domain* p2m = p2m_get_hostp2m(d);
> > +    int res;
> > +    mfn_t mfn;
> > +    p2m_type_t p2mt;
> > +    p2m_access_t p2ma;
> > +
> > +    /* First, handle rx2rw conversion automatically */
> > +    mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query);
> > +
> > +    if ( access_w && p2ma == p2m_access_rx2rw ) {
> > +        p2m_lock(p2m);
> > +        p2m->set_entry(p2m, gfn, mfn, 0, p2mt, p2m_access_rw);
>
> Might be better to use p2m_change_type here; it's atomic so avoids
> potential races.
>

Question: p2m_change_type doesn't have the access permissions: only
set_entry does, so I can't use it.  Should I rather just move p2m_lock above
the get_entry?

>
> > +        p2m_unlock(p2m);
> > +
> > +        return 1;  /* handled */
> > +    }
> > +
> > +    /* Otherwise, check if there is a memory event listener, and send
> > the message along */
> > +    res = mem_event_check_ring(d);
> > +    if ( res < 0 )
> > +    {
> > +        /* No listener */
> > +        if ( p2m->access_required )
> > +        {
> > +            printk(XENLOG_INFO
> > +                   "Memory access permissions failure, no mem_event
> > listener: pausing VCPU %d, dom %d\n",
> > +                   v->vcpu_id, d->domain_id);
> > +
> > +            /* Will pause the VCPU */
> > +            (void) mem_event_check_listener(d);
>
> Why does this need a special case?  Could you just post the violation
> to the ring and pause as normal, and then if a listener ever arrives it
> can handle it?
>

> In fact, I don't see why access_required needs to be a separate flag at
> all - it's implied by setting the access permissions on a page or
> setting the default to anything other than rwx.  That would address
> Keir's concern about adding a separate domcrf flag.
>

The idea is to have two modes: fail-closed, and fail-open.  If
access_required is true, then the VCPU must pause.  But if it is false, I
want to revert the access flags and let the VCPU chug along.  That's to make
sure that life works if the memory event handler crashes, of course, and
explains why the page type is set to rwx and not default_access in that
case.

So I'll add a DOMCTL.


> > +        }
> > +        else
> > +        {
> > +            /* A listener is not required, so clear the access
> restrictions */
> > +            p2m_lock(p2m);
> > +            p2m->set_entry(p2m, gfn, mfn, 0, p2mt, p2m_access_rwx);
> > +            p2m_unlock(p2m);
> > +        }
> > +
> > +        return 1;
> > +    }
> > +    else if ( res > 0 )
> > +        return 1;  /* No space in buffer */
>
> DYM "return 0" here?  I think this function needs a comment describing
> what the return value means.
>

Hmm... it should be return 1, because no space in buffer is satisfied (no
error condition) by pausing the VCPU.  I'm just going to remove the return
code entirely.

>
> [...]
> > diff -r a3cec4b94150 -r 06b0916eb91d xen/include/asm-x86/mem_access.h
> > --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> > +++ b/xen/include/asm-x86/mem_access.h  Tue Jan 04 11:59:48 2011 -0800
> > @@ -0,0 +1,35 @@
> >
> +/******************************************************************************
> > + * include/asm-x86/mem_paging.h
> > + *
> > + * Memory access support.
> > + *
> > + * Copyright (c) 2009 Citrix Systems, Inc.
>
> Again, no.
>
> [...]
>
> > diff -r a3cec4b94150 -r 06b0916eb91d tools/xenpaging/xenpaging.c
> > --- a/tools/xenpaging/xenpaging.c       Tue Jan 04 10:30:15 2011 -0800
> > +++ b/tools/xenpaging/xenpaging.c       Tue Jan 04 11:59:48 2011 -0800
> > @@ -658,7 +658,7 @@
> >              {
> >                  DPRINTF("page already populated (domain = %d; vcpu =
> %d;"
> >                          " p2mt = %x;"
> > -                        " gfn = %"PRIx64"; paused = %"PRId64")\n",
> > +                        " gfn = %"PRIx64"; paused = %d)\n",
> >                          paging->mem_event.domain_id, req.vcpu_id,
> >                          req.p2mt,
> >                          req.gfn, req.flags &
> MEM_EVENT_FLAG_VCPU_PAUSED);
> >
>
> this belongs in a separate patch; it's not part of the change described
> at the top.
>
*
*Are you sure?*  ***I changed the flag type from u64 to u16, so that breaks
this DPRINTF.


> Cheers,
>
> Tim.
>
> --
> Tim Deegan <Tim.Deegan@citrix.com>
> Principal Software Engineer, Xen Platform Team
> Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)
>

Thanks!

[-- Attachment #1.2: Type: text/html, Size: 18781 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2 of 6] REDO: mem_access & mem_access 2: mem event additions for access
  2011-01-05 14:47   ` Joe Epstein
@ 2011-01-05 15:01     ` Tim Deegan
  0 siblings, 0 replies; 4+ messages in thread
From: Tim Deegan @ 2011-01-05 15:01 UTC (permalink / raw)
  To: Joe Epstein; +Cc: xen-devel

At 14:47 +0000 on 05 Jan (1294238857), Joe Epstein wrote:
> True.  I had this as a separate function only in case others found it
> useful in the future, and as a bit of a layer separation, where
> knowledge of the _VPF_mem_event bit would happen only in mem_event.c.
> If you don't mind me breaking that, I can move it over.

If you'd like to keep that separation, you could make a function that
does just the mark-and-pause, but I'd be happy enough with it open-coded
at the caller. 

>> Moving the log-dirty check inside the test for access_w reintroduces a
>> race condition in multi-vcpu systems (where the same log-dirty fault
>> happens on two CPUs and the first CPU has changed the type back to
>> p2m_ram_rw by the time the second one looks up the type).  Please put
>> this case back unconditionally.
> 
> Question: the conditional I'm replacing just simply says if (
> p2m_is_ram(p2mt) ).  I have to better qualify that to know that this
> was an access to a page based on its type and not based on its access
> permissions.  Otherwise, we'll either take memory events on logdirty,
> etc., or we'll miss real ones that got sent to change the page type
> instead.  What is the right conditional then, besides access_w, that
> will direct the handling appropriately?

I think you should always send mem-events when the access bits don't
match the access type.  Then if you didn't send a mem_event, do the
log-dirty/spurious-fault branch with the same condition as before.
Will that do the right thing for the cases you care about?

>>> + * Copyright (c) 2009 Citrix Systems, Inc.
>> 
>> Eh, probably not. :)
> 
> Hmm.  It's a direct copy of mem_paging.c, which is copyright Citrix.
> Since I changed only two lines out of it, it seemed rather
> inappropriate to claim copyright on derivative work.  But, if you'd
> like, I can make that change to claim it for ourselves.

You could leave that copyright line and add your own as well?

>> Might be better to use p2m_change_type here; it's atomic so avoids
>> potential races.
> 
> Question: p2m_change_type doesn't have the access permissions: only
> set_entry does, so I can't use it.  Should I rather just move p2m_lock
> above the get_entry?

Yes, that would be fine. 

>> Why does this need a special case?  Could you just post the violation
>> to the ring and pause as normal, and then if a listener ever arrives it
>> can handle it?
>> 
>> In fact, I don't see why access_required needs to be a separate flag at
>> all - it's implied by setting the access permissions on a page or
>> setting the default to anything other than rwx.  That would address
>> Keir's concern about adding a separate domcrf flag.
> 
> The idea is to have two modes: fail-closed, and fail-open.  If
> access_required is true, then the VCPU must pause.  But if it is
> false, I want to revert the access flags and let the VCPU chug along.
> That's to make sure that life works if the memory event handler
> crashes, of course, and explains why the page type is set to rwx and
> not default_access in that case.

Oh I see.  

> So I'll add a DOMCTL.

Righto. 

>> this belongs in a separate patch; it's not part of the change described
>> at the top.
> 
> Are you sure?  I changed the flag type from u64 to u16, so that breaks this DPRINTF.

Oh, right.  Sorry, I missed that.  Yes, leave this in this patch.

Tim.

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-01-05 15:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-04 22:07 [PATCH 2 of 6] REDO: mem_access & mem_access 2: mem event additions for access Joe Epstein
2011-01-05 14:15 ` Tim Deegan
2011-01-05 14:47   ` Joe Epstein
2011-01-05 15:01     ` Tim Deegan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.