All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC V9 1/5] xen: Emulate with no writes
@ 2014-08-28 11:47 Razvan Cojocaru
  2014-08-28 11:47 ` [PATCH RFC V9 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
                   ` (3 more replies)
  0 siblings, 4 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 11:47 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	jun.nakajima, andrew.cooper3, eddie.dong, tim, JBeulich,
	ian.jackson

Added support for emulating an instruction with no memory writes.
Additionally, introduced hvm_emulate_one_full(), which inspects
possible return values from the hvm_emulate_one() functions
(EXCEPTION, UNHANDLEABLE) and acts on them.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Acked-by: Jan Beulich <jbeulich@suse.com>

---
Changes since V7:
 - Renamed hvm_emulate_one_full() to hvm_mem_event_emulate_one().
 - Added a comment explaining how X86EMUL_RETRY is being treated.

Changes since V6:
 - Added discard versions of write_msr and wbinvd.
 - Now calling hvm_emulate_writeback() for the X86EMUL_UNHANDLEABLE
   case as well.

Changes since V5:
 - Added discard versions of write_io and rep_outs.

Changes since V4:
 - Also discarding IO reads (dummy read_io handler).

Changes since V3:
 - The rep_ins, rep_movs and cmpxchg handlers are
   now also inactive.

Changes since V2:
 - Renamed hvmemul_write_dummy() to hvmemul_write_discard().
 - Fixed a comment (Xen coding style rules).
 - Renamed hvm_emulate_one_with_ops() to _hvm_emulate_one().
 - Changed stack variable hvm_emulate_ops_no_write into a static const.
 - Modified struct hvm_emulate_ctxt ctx initialization syntax.
 - Renamed unhandleable_trapnr to trapnr and unhandleable_errcode
   to errcode.
 - Changed errcode's type to unsigned int.
 - Removed hvm_emulate_one() loop that went on until it returned
   something other than X86EMUL_RETRY (to prevent potential
   blocking against the time calibration rendezvous).

Changes since V1:
 - Removed the Linux code that computes the length of an instruction.
 - Unused function parameters are no longer marked.
 - Refactored the code to eliminate redundancy.
 - Made the exception passed on to the guest by hvm_emulate_one_full()
   configurable.
---
 xen/arch/x86/hvm/emulate.c        |  175 ++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/hvm/emulate.h |    5 ++
 2 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index eac159f..81ba3e4 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -688,6 +688,94 @@ static int hvmemul_write(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_write_discard(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    /* Discarding the write. */
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_ins_discard(
+    uint16_t src_port,
+    enum x86_segment dst_seg,
+    unsigned long dst_offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_movs_discard(
+   enum x86_segment src_seg,
+   unsigned long src_offset,
+   enum x86_segment dst_seg,
+   unsigned long dst_offset,
+   unsigned int bytes_per_rep,
+   unsigned long *reps,
+   struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_outs_discard(
+    enum x86_segment src_seg,
+    unsigned long src_offset,
+    uint16_t dst_port,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_cmpxchg_discard(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_old,
+    void *p_new,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_io_discard(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_io_discard(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_msr_discard(
+    unsigned long reg,
+    uint64_t val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_wbinvd_discard(
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -1138,8 +1226,33 @@ static const struct x86_emulate_ops hvm_emulate_ops = {
     .invlpg        = hvmemul_invlpg
 };
 
-int hvm_emulate_one(
-    struct hvm_emulate_ctxt *hvmemul_ctxt)
+static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
+    .read          = hvmemul_read,
+    .insn_fetch    = hvmemul_insn_fetch,
+    .write         = hvmemul_write_discard,
+    .cmpxchg       = hvmemul_cmpxchg_discard,
+    .rep_ins       = hvmemul_rep_ins_discard,
+    .rep_outs      = hvmemul_rep_outs_discard,
+    .rep_movs      = hvmemul_rep_movs_discard,
+    .read_segment  = hvmemul_read_segment,
+    .write_segment = hvmemul_write_segment,
+    .read_io       = hvmemul_read_io_discard,
+    .write_io      = hvmemul_write_io_discard,
+    .read_cr       = hvmemul_read_cr,
+    .write_cr      = hvmemul_write_cr,
+    .read_msr      = hvmemul_read_msr,
+    .write_msr     = hvmemul_write_msr_discard,
+    .wbinvd        = hvmemul_wbinvd_discard,
+    .cpuid         = hvmemul_cpuid,
+    .inject_hw_exception = hvmemul_inject_hw_exception,
+    .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
+    .get_fpu       = hvmemul_get_fpu,
+    .put_fpu       = hvmemul_put_fpu,
+    .invlpg        = hvmemul_invlpg
+};
+
+static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const struct x86_emulate_ops *ops)
 {
     struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
     struct vcpu *curr = current;
@@ -1191,7 +1304,7 @@ int hvm_emulate_one(
     vio->mmio_retrying = vio->mmio_retry;
     vio->mmio_retry = 0;
 
-    rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
+    rc = x86_emulate(&hvmemul_ctxt->ctxt, ops);
 
     if ( rc == X86EMUL_OKAY && vio->mmio_retry )
         rc = X86EMUL_RETRY;
@@ -1239,6 +1352,62 @@ int hvm_emulate_one(
     return X86EMUL_OKAY;
 }
 
+int hvm_emulate_one(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops);
+}
+
+int hvm_emulate_one_no_write(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
+}
+
+void hvm_mem_event_emulate_one(bool_t nowrite, unsigned int trapnr,
+    unsigned int errcode)
+{
+    struct hvm_emulate_ctxt ctx = {{ 0 }};
+    int rc;
+
+    hvm_emulate_prepare(&ctx, guest_cpu_user_regs());
+
+    if ( nowrite )
+        rc = hvm_emulate_one_no_write(&ctx);
+    else
+        rc = hvm_emulate_one(&ctx);
+
+    switch ( rc )
+    {
+    case X86EMUL_RETRY:
+        /*
+         * This function is called when handling an EPT-related mem_event
+         * reply. As such, nothing else needs to be done here, since simply
+         * returning makes the current instruction cause a page fault again,
+         * consistent with X86EMUL_RETRY.
+         */
+        return;
+    case X86EMUL_UNHANDLEABLE:
+        gdprintk(XENLOG_DEBUG, "Emulation failed @ %04x:%lx: "
+               "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
+               hvmemul_get_seg_reg(x86_seg_cs, &ctx)->sel,
+               ctx.insn_buf_eip,
+               ctx.insn_buf[0], ctx.insn_buf[1],
+               ctx.insn_buf[2], ctx.insn_buf[3],
+               ctx.insn_buf[4], ctx.insn_buf[5],
+               ctx.insn_buf[6], ctx.insn_buf[7],
+               ctx.insn_buf[8], ctx.insn_buf[9]);
+        hvm_inject_hw_exception(trapnr, errcode);
+        break;
+    case X86EMUL_EXCEPTION:
+        if ( ctx.exn_pending )
+            hvm_inject_hw_exception(ctx.exn_vector, ctx.exn_error_code);
+        break;
+    }
+
+    hvm_emulate_writeback(&ctx);
+}
+
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs)
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index 00a06cc..efff97e 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -37,6 +37,11 @@ struct hvm_emulate_ctxt {
 
 int hvm_emulate_one(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
+int hvm_emulate_one_no_write(
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+void hvm_mem_event_emulate_one(bool_t nowrite,
+    unsigned int trapnr,
+    unsigned int errcode);
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH RFC V9 2/5] xen: Optimize introspection access to guest state
  2014-08-28 11:47 [PATCH RFC V9 1/5] xen: Emulate with no writes Razvan Cojocaru
@ 2014-08-28 11:47 ` Razvan Cojocaru
  2014-08-28 11:48 ` [PATCH RFC V9 3/5] xen, libxc: Force-enable relevant MSR events Razvan Cojocaru
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 11:47 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	jun.nakajima, andrew.cooper3, eddie.dong, tim, JBeulich,
	ian.jackson

Speed optimization for introspection purposes: a handful of registers
are sent along with each mem_event. This requires enlargement of the
mem_event_request / mem_event_response stuctures, and additional code
to fill in relevant values. Since the EPT event processing code needs
more data than CR3 or MSR event processors, hvm_mem_event_fill_regs()
fills in less data than p2m_mem_event_fill_regs(), in order to avoid
overhead. Struct hvm_hw_cpu has been considered instead of the custom
struct mem_event_regs_st, but its size would cause quick filling up
of the mem_event ring buffer.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Acked-by: Jan Beulich <jbeulich@suse.com>

---
Changes since V5:
 - Const-correctness.

Changes since V4:
 - Renamed "x86_mem_event_regs" to "mem_event_regs_x86" and
   removed a typedef.

Changes since V3:
 - Changed the name of VCPU variables referring to the current
   VCPU to "curr".
 - Renamed "mem_event_regs" to "x86_mem_event_regs" to make it
   explicit.

Changes since V2:
 - Removed inline function compiler suggestions.
 - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
 - Padded struct mem_event_regs_st.
 - Renamed mem_event_regs_t to mem_event_regs.
 - Readability changes.

Changes since V1:
 - Replaced guest_x86_mode with cs_arbytes in the mem_event_regs_st
   structure.
 - Removed superfluous preprocessor check for __x86_64__ in
   p2m_mem_event_fill_regs().
---
 xen/arch/x86/hvm/hvm.c         |   33 +++++++++++++++++++++++
 xen/arch/x86/mm/p2m.c          |   57 ++++++++++++++++++++++++++++++++++++++++
 xen/include/public/mem_event.h |   39 +++++++++++++++++++++++++++
 3 files changed, 129 insertions(+)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 94b18ba..05df1e5 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -6148,6 +6148,38 @@ int hvm_debug_op(struct vcpu *v, int32_t op)
     return rc;
 }
 
+static void hvm_mem_event_fill_regs(mem_event_request_t *req)
+{
+    const struct cpu_user_regs *regs = guest_cpu_user_regs();
+    const struct vcpu *curr = current;
+
+    req->x86_regs.rax = regs->eax;
+    req->x86_regs.rcx = regs->ecx;
+    req->x86_regs.rdx = regs->edx;
+    req->x86_regs.rbx = regs->ebx;
+    req->x86_regs.rsp = regs->esp;
+    req->x86_regs.rbp = regs->ebp;
+    req->x86_regs.rsi = regs->esi;
+    req->x86_regs.rdi = regs->edi;
+
+    req->x86_regs.r8  = regs->r8;
+    req->x86_regs.r9  = regs->r9;
+    req->x86_regs.r10 = regs->r10;
+    req->x86_regs.r11 = regs->r11;
+    req->x86_regs.r12 = regs->r12;
+    req->x86_regs.r13 = regs->r13;
+    req->x86_regs.r14 = regs->r14;
+    req->x86_regs.r15 = regs->r15;
+
+    req->x86_regs.rflags = regs->eflags;
+    req->x86_regs.rip    = regs->eip;
+
+    req->x86_regs.msr_efer = curr->arch.hvm_vcpu.guest_efer;
+    req->x86_regs.cr0 = curr->arch.hvm_vcpu.guest_cr[0];
+    req->x86_regs.cr3 = curr->arch.hvm_vcpu.guest_cr[3];
+    req->x86_regs.cr4 = curr->arch.hvm_vcpu.guest_cr[4];
+}
+
 static int hvm_memory_event_traps(long p, uint32_t reason,
                                   unsigned long value, unsigned long old, 
                                   bool_t gla_valid, unsigned long gla) 
@@ -6192,6 +6224,7 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
         req.gla = old;
     }
     
+    hvm_mem_event_fill_regs(&req);
     mem_event_put_request(d, &d->mem_event->access, &req);
     
     return 1;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index bca9f0f..41a316a 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1323,6 +1323,61 @@ void p2m_mem_paging_resume(struct domain *d)
     }
 }
 
+static void p2m_mem_event_fill_regs(mem_event_request_t *req)
+{
+    const struct cpu_user_regs *regs = guest_cpu_user_regs();
+    struct segment_register seg;
+    struct hvm_hw_cpu ctxt;
+    struct vcpu *curr = current;
+
+    /* Architecture-specific vmcs/vmcb bits */
+    hvm_funcs.save_cpu_ctxt(curr, &ctxt);
+
+    req->x86_regs.rax = regs->eax;
+    req->x86_regs.rcx = regs->ecx;
+    req->x86_regs.rdx = regs->edx;
+    req->x86_regs.rbx = regs->ebx;
+    req->x86_regs.rsp = regs->esp;
+    req->x86_regs.rbp = regs->ebp;
+    req->x86_regs.rsi = regs->esi;
+    req->x86_regs.rdi = regs->edi;
+
+    req->x86_regs.r8  = regs->r8;
+    req->x86_regs.r9  = regs->r9;
+    req->x86_regs.r10 = regs->r10;
+    req->x86_regs.r11 = regs->r11;
+    req->x86_regs.r12 = regs->r12;
+    req->x86_regs.r13 = regs->r13;
+    req->x86_regs.r14 = regs->r14;
+    req->x86_regs.r15 = regs->r15;
+
+    req->x86_regs.rflags = regs->eflags;
+    req->x86_regs.rip    = regs->eip;
+
+    req->x86_regs.dr7 = curr->arch.debugreg[7];
+    req->x86_regs.cr0 = ctxt.cr0;
+    req->x86_regs.cr2 = ctxt.cr2;
+    req->x86_regs.cr3 = ctxt.cr3;
+    req->x86_regs.cr4 = ctxt.cr4;
+
+    req->x86_regs.sysenter_cs = ctxt.sysenter_cs;
+    req->x86_regs.sysenter_esp = ctxt.sysenter_esp;
+    req->x86_regs.sysenter_eip = ctxt.sysenter_eip;
+
+    req->x86_regs.msr_efer = ctxt.msr_efer;
+    req->x86_regs.msr_star = ctxt.msr_star;
+    req->x86_regs.msr_lstar = ctxt.msr_lstar;
+
+    hvm_get_segment_register(curr, x86_seg_fs, &seg);
+    req->x86_regs.fs_base = seg.base;
+
+    hvm_get_segment_register(curr, x86_seg_gs, &seg);
+    req->x86_regs.gs_base = seg.base;
+
+    hvm_get_segment_register(curr, x86_seg_cs, &seg);
+    req->x86_regs.cs_arbytes = seg.attr.bytes;
+}
+
 bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, 
                           bool_t access_r, bool_t access_w, bool_t access_x,
                           mem_event_request_t **req_ptr)
@@ -1410,6 +1465,8 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
         req->access_x = access_x;
     
         req->vcpu_id = v->vcpu_id;
+
+        p2m_mem_event_fill_regs(req);
     }
 
     /* Pause the current VCPU */
diff --git a/xen/include/public/mem_event.h b/xen/include/public/mem_event.h
index 3831b41..bbd1636 100644
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -48,6 +48,44 @@
 #define MEM_EVENT_REASON_MSR         7    /* MSR was hit: gfn is MSR value, gla is MSR address;
                                              does NOT honour HVMPME_onchangeonly */
 
+/* Using a custom struct (not hvm_hw_cpu) so as to not fill
+ * the mem_event ring buffer too quickly. */
+struct mem_event_regs_x86 {
+    uint64_t rax;
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbx;
+    uint64_t rsp;
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+    uint64_t rflags;
+    uint64_t dr7;
+    uint64_t rip;
+    uint64_t cr0;
+    uint64_t cr2;
+    uint64_t cr3;
+    uint64_t cr4;
+    uint64_t sysenter_cs;
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+    uint64_t msr_efer;
+    uint64_t msr_star;
+    uint64_t msr_lstar;
+    uint64_t fs_base;
+    uint64_t gs_base;
+    uint32_t cs_arbytes;
+    uint32_t _pad;
+};
+
 typedef struct mem_event_st {
     uint32_t flags;
     uint32_t vcpu_id;
@@ -65,6 +103,7 @@ typedef struct mem_event_st {
     uint16_t available:12;
 
     uint16_t reason;
+    struct mem_event_regs_x86 x86_regs;
 } mem_event_request_t, mem_event_response_t;
 
 DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH RFC V9 3/5] xen, libxc: Force-enable relevant MSR events
  2014-08-28 11:47 [PATCH RFC V9 1/5] xen: Emulate with no writes Razvan Cojocaru
  2014-08-28 11:47 ` [PATCH RFC V9 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
@ 2014-08-28 11:48 ` Razvan Cojocaru
  2014-08-28 11:48 ` [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
  2014-08-28 11:48 ` [PATCH RFC V9 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
  3 siblings, 0 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 11:48 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	jun.nakajima, andrew.cooper3, eddie.dong, tim, JBeulich,
	ian.jackson

Vmx_disable_intercept_for_msr() will now refuse to disable interception of
MSRs needed for memory introspection. It is not possible to gate this on
mem_access being active for the domain, since by the time mem_access does
become active the interception for the interesting MSRs has already been
disabled (vmx_disable_intercept_for_msr() runs very early on).

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Acked-by: Jan Beulich <jbeulich@suse.com>

---
Changes since V8:
 - Renamed vmx_msrs_exit_array to vmx_introspection_force_enabled_msrs.

Changes since V7:
 - Reversed if conditions (cheapest one first).
 - Combined two if statements.
 - Moved "bool_t introspection_enabled;" to the bool_t group above.
 - Renamed msrs_exit_array to vmx_msrs_exit_array and made it
   "extern" in the header.

Changes since V6:
 - Moved the array of interesting MSRs to common header.
 - Minor code cleanup.

Changes since V5:
 - Added xc_mem_access_enable_introspection() to libxc, which has
   the same parameters and semantics as xc_mem_access_enable(),
   but it additionally sets d->arch.hvm_domain.introspection_enabled
   and enables relevant MSR interception.
 - Renamed vmx_enable_intro_msr_interception() to
   vmx_enable_msr_exit_interception().
 - Simplified vmx_enable_msr_exit_interception() (const array of MSRs).

Changes since V3:
 - Removed log line stating that MSR interception cannot be disabled.
 - Removed superfluous #include <asm/hvm/vmx/vmcs.h>.
 - Moved VMX-specific code to vmx.c (as a new hvm_funcs member).

Changes since V2:
 - Split a log line differently to keep it grepable.
 - Interception for relevant MSRs will be disabled only if
   mem_access is not enabled.
 - Since they end up being disabled early on (when mem_access
   is not enabled yet), re-enable interception when mem_access
   becomes active.

Changes since V1:
 - Replaced printk() with gdprintk(XENLOG_DEBUG, ...).
---
 tools/libxc/xc_mem_access.c        |   10 +++++++++-
 tools/libxc/xc_mem_event.c         |    7 +++++--
 tools/libxc/xc_private.h           |    2 +-
 tools/libxc/xenctrl.h              |    2 ++
 xen/arch/x86/hvm/vmx/vmcs.c        |   25 +++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c         |   13 +++++++++++++
 xen/arch/x86/mm/mem_event.c        |   11 +++++++++++
 xen/include/asm-x86/hvm/domain.h   |    1 +
 xen/include/asm-x86/hvm/hvm.h      |    2 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h |    7 +++++++
 xen/include/public/domctl.h        |    7 ++++---
 11 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/tools/libxc/xc_mem_access.c b/tools/libxc/xc_mem_access.c
index 461f0e9..55d0e9f 100644
--- a/tools/libxc/xc_mem_access.c
+++ b/tools/libxc/xc_mem_access.c
@@ -26,7 +26,15 @@
 
 void *xc_mem_access_enable(xc_interface *xch, domid_t domain_id, uint32_t *port)
 {
-    return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN, port);
+    return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN,
+                               port, 0);
+}
+
+void *xc_mem_access_enable_introspection(xc_interface *xch, domid_t domain_id,
+                                         uint32_t *port)
+{
+    return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN,
+                               port, 1);
 }
 
 int xc_mem_access_disable(xc_interface *xch, domid_t domain_id)
diff --git a/tools/libxc/xc_mem_event.c b/tools/libxc/xc_mem_event.c
index faf1cc6..8c0be4e 100644
--- a/tools/libxc/xc_mem_event.c
+++ b/tools/libxc/xc_mem_event.c
@@ -57,7 +57,7 @@ int xc_mem_event_memop(xc_interface *xch, domid_t domain_id,
 }
 
 void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
-                          uint32_t *port)
+                          uint32_t *port, int enable_introspection)
 {
     void *ring_page = NULL;
     uint64_t pfn;
@@ -120,7 +120,10 @@ void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
         break;
 
     case HVM_PARAM_ACCESS_RING_PFN:
-        op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE;
+        if ( enable_introspection )
+            op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION;
+        else
+            op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE;
         mode = XEN_DOMCTL_MEM_EVENT_OP_ACCESS;
         break;
 
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index c50a7c9..94df688 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -381,6 +381,6 @@ int xc_mem_event_memop(xc_interface *xch, domid_t domain_id,
  * param can be HVM_PARAM_PAGING/ACCESS/SHARING_RING_PFN
  */
 void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
-                          uint32_t *port);
+                          uint32_t *port, int enable_introspection);
 
 #endif /* __XC_PRIVATE_H__ */
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 1c5d0db..28b5562 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -2263,6 +2263,8 @@ int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
  * Caller has to unmap this page when done.
  */
 void *xc_mem_access_enable(xc_interface *xch, domid_t domain_id, uint32_t *port);
+void *xc_mem_access_enable_introspection(xc_interface *xch, domid_t domain_id,
+                                         uint32_t *port);
 int xc_mem_access_disable(xc_interface *xch, domid_t domain_id);
 int xc_mem_access_resume(xc_interface *xch, domid_t domain_id);
 
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 4a4f4e1..fc1f882 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -39,6 +39,7 @@
 #include <xen/keyhandler.h>
 #include <asm/shadow.h>
 #include <asm/tboot.h>
+#include <asm/mem_event.h>
 
 static bool_t __read_mostly opt_vpid_enabled = 1;
 boolean_param("vpid", opt_vpid_enabled);
@@ -71,6 +72,18 @@ u32 vmx_vmexit_control __read_mostly;
 u32 vmx_vmentry_control __read_mostly;
 u64 vmx_ept_vpid_cap __read_mostly;
 
+const u32 vmx_introspection_force_enabled_msrs[] = {
+    MSR_IA32_SYSENTER_EIP,
+    MSR_IA32_SYSENTER_ESP,
+    MSR_IA32_SYSENTER_CS,
+    MSR_IA32_MC0_CTL,
+    MSR_STAR,
+    MSR_LSTAR
+};
+
+const unsigned int vmx_introspection_force_enabled_msrs_size =
+    ARRAY_SIZE(vmx_introspection_force_enabled_msrs);
+
 static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region);
 static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
@@ -695,11 +708,23 @@ static void vmx_set_host_env(struct vcpu *v)
 void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
 {
     unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
+    struct domain *d = v->domain;
 
     /* VMX MSR bitmap supported? */
     if ( msr_bitmap == NULL )
         return;
 
+    if ( unlikely(d->arch.hvm_domain.introspection_enabled) &&
+         mem_event_check_ring(&d->mem_event->access) )
+    {
+        unsigned int i;
+
+        /* Filter out MSR-s needed for memory introspection */
+        for ( i = 0; i < vmx_introspection_force_enabled_msrs_size; i++ )
+            if ( msr == vmx_introspection_force_enabled_msrs[i] )
+                return;
+    }
+
     /*
      * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
      * have the write-low and read-high bitmap offsets the wrong way round.
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index dc18a72..b80f017 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1682,6 +1682,18 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
         *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
 }
 
+static void vmx_enable_msr_exit_interception(struct domain *d)
+{
+    struct vcpu *v;
+    unsigned int i;
+
+    /* Enable interception for MSRs needed for memory introspection. */
+    for_each_vcpu ( d, v )
+        for ( i = 0; i < vmx_introspection_force_enabled_msrs_size; i++ )
+            vmx_enable_intercept_for_msr(v, vmx_introspection_force_enabled_msrs[i],
+                                         MSR_TYPE_W);
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -1740,6 +1752,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .handle_eoi           = vmx_handle_eoi,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
+    .enable_msr_exit_interception = vmx_enable_msr_exit_interception,
 };
 
 const struct hvm_function_table * __init start_vmx(void)
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index ba7e71e..fdd5ff6 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -587,6 +587,7 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
         switch( mec->op )
         {
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
+        case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION:
         {
             rc = -ENODEV;
             /* Only HAP is supported */
@@ -600,13 +601,23 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
             rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
                                     HVM_PARAM_ACCESS_RING_PFN,
                                     mem_access_notification);
+
+            if ( mec->op != XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE &&
+                 rc == 0 && hvm_funcs.enable_msr_exit_interception )
+            {
+                d->arch.hvm_domain.introspection_enabled = 1;
+                hvm_funcs.enable_msr_exit_interception(d);
+            }
         }
         break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
         {
             if ( med->ring_page )
+            {
                 rc = mem_event_disable(d, med);
+                d->arch.hvm_domain.introspection_enabled = 0;
+            }
         }
         break;
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 291a2e0..30d4aa3 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -134,6 +134,7 @@ struct hvm_domain {
     bool_t                 mem_sharing_enabled;
     bool_t                 qemu_mapcache_invalidate;
     bool_t                 is_s3_suspended;
+    bool_t                 introspection_enabled;
 
     /*
      * TSC value that VCPUs use to calculate their tsc_offset value.
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0ebd478..069ba39 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -205,6 +205,8 @@ struct hvm_function_table {
     void (*hypervisor_cpuid_leaf)(uint32_t sub_idx,
                                   uint32_t *eax, uint32_t *ebx,
                                   uint32_t *ecx, uint32_t *edx);
+
+    void (*enable_msr_exit_interception)(struct domain *d);
 };
 
 extern struct hvm_function_table hvm_funcs;
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 215d93c..6a99dca 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -471,6 +471,13 @@ enum vmcs_field {
     HOST_RIP                        = 0x00006c16,
 };
 
+/*
+ * A set of MSR-s that need to be enabled for memory introspection
+ * to work.
+ */
+extern const u32 vmx_introspection_force_enabled_msrs[];
+extern const unsigned int vmx_introspection_force_enabled_msrs_size;
+
 #define VMCS_VPID_WIDTH 16
 
 #define MSR_TYPE_R 1
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 5b11bbf..a3431ec 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -772,10 +772,11 @@ struct xen_domctl_gdbsx_domstatus {
  * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
  * EBUSY  - guest has or had access enabled, ring buffer still active
  */
-#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS            2
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS                        2
 
-#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE     0
-#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE    1
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE                 0
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE                1
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION   2
 
 /*
  * Sharing ENOMEM helper.
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-28 11:47 [PATCH RFC V9 1/5] xen: Emulate with no writes Razvan Cojocaru
  2014-08-28 11:47 ` [PATCH RFC V9 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
  2014-08-28 11:48 ` [PATCH RFC V9 3/5] xen, libxc: Force-enable relevant MSR events Razvan Cojocaru
@ 2014-08-28 11:48 ` Razvan Cojocaru
  2014-08-28 12:03   ` Jan Beulich
  2014-08-28 11:48 ` [PATCH RFC V9 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
  3 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 11:48 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	jun.nakajima, andrew.cooper3, eddie.dong, tim, JBeulich,
	ian.jackson

Added new XEN_DOMCTL_set_pagefault_info hypercall, used by libxc's
new xc_domain_set_pagefault_info() function to set per-domain page
fault injection information. All a call does is set per-domain info,
and nothing actually happens until VMENTRY time, and then only if
all conditions are met (the guest is in user mode, the set value
matches CR3, and there are no other pending traps).
This mechanism allows bringing in swapped-out pages for inspection.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>

---
Changes since V8:
 - Injection is now VCPU-based instead of domain-based.

Changes since V7:
 - Removed pointless parentheses and curly braces.
 - Renamed xc_domain_set_pagefault_info() to
   xc_domain_request_pagefault().

Changes since V6:
 - Removed the write_access flag, now passing the whole error code
   (still OR-ed with PFEC_user_mode).
 - Replaced hex constants with #defined macros.
 - Minor code / coding style cleanup.

Changes since V5:
 - Renamed "struct domain *d" to "currd" where appropriate.
 - Using ! to test bool_t variables.
 - Replaced hvm_get_segment_register() with
   vmx_get_segment_register() in vmx.c.
 - Removed double blank line.
 - Proper comparison of CR3 and address_space (taking into account
   LM, PAE).
 - Using domain variable already set up in do_domctl().
 - Removed optional parentheses.
 - Removed d->arch.hvm_domain.fault_info reset (only the "valid"
   flag counts anyway).

Changes since V4:
 - Added "valid" flag instead of relying on virtual address 0 as a
   special case.
 - Using #defines instead of magic constants in vmx_vmcs_save().
 - Padded struct xen_domctl_set_pagefault_info.
 - Coding style and readability changes.

Changes since V3:
 - Now checking that is_hvm_domain(d) in do_domctl() (consistent with
   the test in the actual page fault injection code).
 - XEN_DOMCTL_set_pagefault_info now extends the proper range.
 - Added a likely() around the
   d->arch.hvm_domain.fault_info.virtual_address == 0 check.
 - Replaced hvm_funcs.save_cpu_ctxt() function call with lighter code.
 - Split the vmx.c page fault injection function into a check
   function and an inject function.

Changes since V2:
 - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
 - Now checking SS.DPL instead of CS.DPL to see if the guest is in
   user mode.
 - Removed superfluous fault_info members initialization.
 - Now returning -EINVAL if !has_hvm_container_domain(d) on
   XEN_DOMCTL_set_pagefault_info.
 - Moved struct fault_info from struct domain to struct hvm_domain.
 - Collapsed explicit initialization of the fault_info members into a
   one-line assignment on the tools/libxc side.
 - Now using write_access as a proper bool instead of shifting and
   OR-ing it with PFEC_user_mode.
---
 tools/libxc/xc_domain.c      |   18 ++++++++++++++
 tools/libxc/xenctrl.h        |    5 ++++
 xen/arch/x86/hvm/vmx/vmx.c   |   56 ++++++++++++++++++++++++++++++++++++++++++
 xen/common/domctl.c          |   22 +++++++++++++++++
 xen/include/asm-x86/domain.h |    9 +++++++
 xen/include/public/domctl.h  |   17 +++++++++++++
 6 files changed, 127 insertions(+)

diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index c67ac9a..f880666 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -507,6 +507,24 @@ int xc_domain_hvm_setcontext(xc_interface *xch,
     return ret;
 }
 
+int xc_domain_request_pagefault(xc_interface *xch,
+                                uint32_t domid,
+                                uint32_t vcpu,
+                                xen_domctl_request_pagefault_info_t *info)
+{
+    DECLARE_DOMCTL;
+
+    if (info == NULL)
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_request_pagefault;
+    domctl.domain = (domid_t)domid;
+    domctl.u.request_pagefault_info = *info;
+    domctl.u.vcpucontext.vcpu = (uint16_t)vcpu;
+
+    return do_domctl(xch, &domctl);
+}
+
 int xc_vcpu_getcontext(xc_interface *xch,
                        uint32_t domid,
                        uint32_t vcpu,
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 28b5562..7e25c65 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -803,6 +803,11 @@ int xc_domain_hvm_setcontext(xc_interface *xch,
 const char *xc_domain_get_native_protocol(xc_interface *xch,
                                           uint32_t domid);
 
+int xc_domain_request_pagefault(xc_interface *xch,
+                                uint32_t domid,
+                                uint32_t vcpu,
+                                xen_domctl_request_pagefault_info_t *info);
+
 /**
  * This function returns information about the execution context of a
  * particular vcpu of a domain.
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b80f017..f1e9097 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3108,6 +3108,59 @@ out:
         nvmx_idtv_handling();
 }
 
+static bool_t vmx_check_pf_injection(void)
+{
+    struct vcpu *curr = current;
+    const struct domain *currd = curr->domain;
+    struct segment_register seg;
+    unsigned long ev;
+    uint32_t pending_event = 0;
+    uint64_t mask;
+
+    if ( !is_hvm_domain(currd) ||
+         likely(!curr->arch.pagefault_request.valid) )
+        return 0;
+
+    vmx_get_segment_register(curr, x86_seg_ss, &seg);
+
+    if ( seg.attr.fields.dpl != 3 ) /* Guest is not in user mode */
+        return 0;
+
+    if ( hvm_long_mode_enabled(curr) )
+        mask = PADDR_MASK & PAGE_MASK; /* Bits 51:12. */
+    else if ( hvm_pae_enabled(curr) )
+        mask = 0x00000000ffffffe0; /* Bits 31:5. */
+    else
+        mask = (uint32_t)PAGE_MASK; /* Bits 31:12. */
+
+    if ( (curr->arch.hvm_vcpu.guest_cr[3] & mask) !=
+         (curr->arch.pagefault_request.address_space & mask) )
+        return 0;
+
+    vmx_vmcs_enter(curr);
+    __vmread(VM_ENTRY_INTR_INFO, &ev);
+    vmx_vmcs_exit(curr);
+
+    if ( (ev & INTR_INFO_VALID_MASK) &&
+         hvm_event_needs_reinjection(MASK_EXTR(ev, INTR_INFO_INTR_TYPE_MASK),
+                                     ev & INTR_INFO_VECTOR_MASK) )
+        pending_event = ev;
+
+    return pending_event == 0;
+}
+
+static void vmx_inject_pf(void)
+{
+    struct vcpu *curr = current;
+    uint64_t virtual_address =
+        curr->arch.pagefault_request.virtual_address;
+
+    curr->arch.pagefault_request.valid = 0;
+
+    hvm_inject_page_fault(curr->arch.pagefault_request.errcode |
+                          PFEC_user_mode, virtual_address);
+}
+
 void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3148,6 +3201,9 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(need_flush) )
         vpid_sync_all();
 
+    if ( vmx_check_pf_injection() )
+        vmx_inject_pf();
+
  out:
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index c326aba..87b5d8e 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -967,6 +967,28 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
     }
     break;
 
+    case XEN_DOMCTL_request_pagefault:
+    {
+        unsigned int vcpu = op->u.vcpucontext.vcpu;
+        struct vcpu *v;
+
+        ret = -EINVAL;
+        if ( vcpu >= d->max_vcpus || (v = d->vcpu[vcpu]) == NULL ||
+             !is_hvm_domain(d) )
+            break;
+
+        v->arch.pagefault_request.address_space =
+            op->u.request_pagefault_info.address_space;
+        v->arch.pagefault_request.virtual_address =
+            op->u.request_pagefault_info.virtual_address;
+        v->arch.pagefault_request.errcode =
+            op->u.request_pagefault_info.errcode;
+        v->arch.pagefault_request.valid = 1;
+
+        ret = 0;
+    }
+    break;
+
     default:
         ret = arch_do_domctl(op, d, u_domctl);
         break;
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 83329ed..0e0ec4b 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -458,6 +458,15 @@ struct arch_vcpu
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
+
+    /* Memory introspection page fault injection data. */
+    struct {
+        uint64_t address_space;
+        uint64_t virtual_address;
+        uint32_t errcode;
+        bool_t valid;
+    } pagefault_request;
+
 } __cacheline_aligned;
 
 smap_check_policy_t smap_policy_change(struct vcpu *v,
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index a3431ec..69224be 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -937,6 +937,21 @@ typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
 #endif
 
+/*
+ * XEN_DOMCTL_set_pagefault_info requests that a page fault occur at
+ * the next VMENTRY.
+ */
+struct xen_domctl_request_pagefault_info {
+    uint64_t address_space;
+    uint64_t virtual_address;
+    uint32_t errcode;
+    uint32_t _pad;
+};
+typedef struct xen_domctl_request_pagefault_info
+    xen_domctl_request_pagefault_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_request_pagefault_info_t);
+
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -1009,6 +1024,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_cacheflush                    71
 #define XEN_DOMCTL_get_vcpu_msrs                 72
 #define XEN_DOMCTL_set_vcpu_msrs                 73
+#define XEN_DOMCTL_request_pagefault             74
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1069,6 +1085,7 @@ struct xen_domctl {
         struct xen_domctl_cacheflush        cacheflush;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
+        struct xen_domctl_request_pagefault_info request_pagefault_info;
         uint8_t                             pad[128];
     } u;
 };
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH RFC V9 5/5] xen: Handle resumed instruction based on previous mem_event reply
  2014-08-28 11:47 [PATCH RFC V9 1/5] xen: Emulate with no writes Razvan Cojocaru
                   ` (2 preceding siblings ...)
  2014-08-28 11:48 ` [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
@ 2014-08-28 11:48 ` Razvan Cojocaru
  2014-08-28 12:09   ` Jan Beulich
  3 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 11:48 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	jun.nakajima, andrew.cooper3, eddie.dong, tim, JBeulich,
	ian.jackson

In a scenario where a page fault that triggered a mem_event occured,
p2m_mem_access_check() will now be able to either 1) emulate the
current instruction, or 2) emulate it, but don't allow it to perform
any writes.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

---
Changes since V8:
 - Make sure that the exit reason is EXIT_REASON_EPT_VIOLATION
   before checking for EPT_GLA_FAULT.

Changes since V7:
 - Now calling hvm_mem_event_emulate_one() instead of
   hvm_emulate_one_full() (renamed in the first patch).

Changes since V5:
 - Fixed indentation.
 - Re-ordered a set of if() conditions.

Changes since V4:
 - Renamed "exited_by_pagefault" to "exited_by_nested_pagefault".
 - Folded two long lines.
 - Combined two "if" statements.
 - Moved "violation = 0;" to the XENMEM_access_rwx case.

Changes since V3:
 - Collapsed verbose lines into a single "else if()".
 - Changed an int to bool_t.
 - Fixed a minor coding style issue.
 - Now computing the first parameter to hvm_emulate_one_full()
   (replaced an "if" with a single call).
 - Added code comments about eip and gla reset (clarity issue).
 - Removed duplicate code by Andrew Cooper (introduced in V2,
   since committed).

Changes since V2:
 - Moved the __vmread(EXIT_QUALIFICATION, &exit_qualification); code
   in vmx.c, accessible via hvm_funcs.
 - Incorporated changes by Andrew Cooper ("[PATCH 1/2] Xen/mem_event:
   Validate the response vcpu_id before acting on it."

Changes since V1:
 - Removed the 'skip' code which required computing the current
   instruction length.
 - Removed the set_ad_bits() code that attempted to modify the
   'accessed' and 'dirty' bits for instructions that the emulator
   can't handle at the moment.
---
 xen/arch/x86/domain.c          |    3 ++
 xen/arch/x86/hvm/vmx/vmx.c     |   18 +++++++++
 xen/arch/x86/mm/p2m.c          |   84 ++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/domain.h   |    8 ++++
 xen/include/asm-x86/hvm/hvm.h  |    2 +
 xen/include/public/mem_event.h |   12 +++---
 6 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index f7e0e78..7b1dfe6 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -415,6 +415,9 @@ int vcpu_initialise(struct vcpu *v)
 
     v->arch.flags = TF_kernel_mode;
 
+    /* By default, do not emulate */
+    v->arch.mem_event.emulate_flags = 0;
+
     rc = mapcache_vcpu_init(v);
     if ( rc )
         return rc;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index f1e9097..8983e70 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1694,6 +1694,23 @@ static void vmx_enable_msr_exit_interception(struct domain *d)
                                          MSR_TYPE_W);
 }
 
+static bool_t vmx_exited_by_nested_pagefault(void)
+{
+    unsigned long exit_qualification, exit_reason;
+
+    __vmread(VM_EXIT_REASON, &exit_reason);
+
+    if ( exit_reason != EXIT_REASON_EPT_VIOLATION )
+        return 0;
+
+    __vmread(EXIT_QUALIFICATION, &exit_qualification);
+
+    if ( (exit_qualification & EPT_GLA_FAULT) == 0 )
+        return 0;
+
+    return 1;
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -1753,6 +1770,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
     .enable_msr_exit_interception = vmx_enable_msr_exit_interception,
+    .exited_by_nested_pagefault  = vmx_exited_by_nested_pagefault,
 };
 
 const struct hvm_function_table * __init start_vmx(void)
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 41a316a..3ef3c81 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1391,6 +1391,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
     p2m_access_t p2ma;
     mem_event_request_t *req;
     int rc;
+    unsigned long eip = guest_cpu_user_regs()->eip;
 
     /* First, handle rx2rw conversion automatically.
      * These calls to p2m->set_entry() must succeed: we have the gfn
@@ -1443,6 +1444,36 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
             return 1;
         }
     }
+    else if ( v->arch.mem_event.emulate_flags == 0 &&
+              hvm_funcs.exited_by_nested_pagefault &&
+              !hvm_funcs.exited_by_nested_pagefault() ) /* don't send a mem_event */
+    {
+        v->arch.mem_event.emulate_flags = MEM_EVENT_FLAG_EMULATE;
+        v->arch.mem_event.gpa = gpa;
+        v->arch.mem_event.eip = eip;
+    }
+
+    /* The previous mem_event reply does not match the current state. */
+    if ( v->arch.mem_event.gpa != gpa || v->arch.mem_event.eip != eip )
+    {
+        /* Don't emulate the current instruction, send a new mem_event. */
+        v->arch.mem_event.emulate_flags = 0;
+
+        /* Make sure to mark the current state to match it again against
+         * the new mem_event about to be sent. */
+        v->arch.mem_event.gpa = gpa;
+        v->arch.mem_event.eip = eip;
+    }
+
+    if ( v->arch.mem_event.emulate_flags )
+    {
+        hvm_mem_event_emulate_one((v->arch.mem_event.emulate_flags &
+                                   MEM_EVENT_FLAG_EMULATE_NOWRITE) != 0,
+                                  TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
+
+        v->arch.mem_event.emulate_flags = 0;
+        return 1;
+    }
 
     *req_ptr = NULL;
     req = xzalloc(mem_event_request_t);
@@ -1495,6 +1526,59 @@ void p2m_mem_access_resume(struct domain *d)
 
         v = d->vcpu[rsp.vcpu_id];
 
+        /* Mark vcpu for skipping one instruction upon rescheduling */
+        if ( rsp.flags & MEM_EVENT_FLAG_EMULATE )
+        {
+            xenmem_access_t access;
+            bool_t violation = 1;
+
+            v->arch.mem_event.emulate_flags = 0;
+
+            if ( p2m_get_mem_access(d, rsp.gfn, &access) == 0 )
+            {
+                switch ( access )
+                {
+                case XENMEM_access_n:
+                case XENMEM_access_n2rwx:
+                default:
+                    violation = rsp.access_r || rsp.access_w || rsp.access_x;
+                    break;
+
+                case XENMEM_access_r:
+                    violation = rsp.access_w || rsp.access_x;
+                    break;
+
+                case XENMEM_access_w:
+                    violation = rsp.access_r || rsp.access_x;
+                    break;
+
+                case XENMEM_access_x:
+                    violation = rsp.access_r || rsp.access_w;
+                    break;
+
+                case XENMEM_access_rx:
+                case XENMEM_access_rx2rw:
+                    violation = rsp.access_w;
+                    break;
+
+                case XENMEM_access_wx:
+                    violation = rsp.access_r;
+                    break;
+
+                case XENMEM_access_rw:
+                    violation = rsp.access_x;
+                    break;
+
+                case XENMEM_access_rwx:
+                    violation = 0;
+                    break;
+                }
+            }
+
+            if ( violation )
+                v->arch.mem_event.emulate_flags = rsp.flags;
+        }
+
         /* Unpause domain */
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
             mem_event_vcpu_unpause(v);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 0e0ec4b..9c599d1 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -467,6 +467,14 @@ struct arch_vcpu
         bool_t valid;
     } pagefault_request;
 
+    /* Should we emulate the next matching instruction on VCPU resume
+     * after a mem_event? */
+    struct {
+        uint32_t emulate_flags;
+        unsigned long gpa;
+        unsigned long eip;
+    } mem_event;
+
 } __cacheline_aligned;
 
 smap_check_policy_t smap_policy_change(struct vcpu *v,
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 069ba39..8f0292c 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -207,6 +207,8 @@ struct hvm_function_table {
                                   uint32_t *ecx, uint32_t *edx);
 
     void (*enable_msr_exit_interception)(struct domain *d);
+
+    bool_t (*exited_by_nested_pagefault)(void);
 };
 
 extern struct hvm_function_table hvm_funcs;
diff --git a/xen/include/public/mem_event.h b/xen/include/public/mem_event.h
index bbd1636..49f1b84 100644
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -31,11 +31,13 @@
 #include "io/ring.h"
 
 /* Memory event flags */
-#define MEM_EVENT_FLAG_VCPU_PAUSED  (1 << 0)
-#define MEM_EVENT_FLAG_DROP_PAGE    (1 << 1)
-#define MEM_EVENT_FLAG_EVICT_FAIL   (1 << 2)
-#define MEM_EVENT_FLAG_FOREIGN      (1 << 3)
-#define MEM_EVENT_FLAG_DUMMY        (1 << 4)
+#define MEM_EVENT_FLAG_VCPU_PAUSED     (1 << 0)
+#define MEM_EVENT_FLAG_DROP_PAGE       (1 << 1)
+#define MEM_EVENT_FLAG_EVICT_FAIL      (1 << 2)
+#define MEM_EVENT_FLAG_FOREIGN         (1 << 3)
+#define MEM_EVENT_FLAG_DUMMY           (1 << 4)
+#define MEM_EVENT_FLAG_EMULATE         (1 << 5)
+#define MEM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6)
 
 /* Reasons for the memory event request */
 #define MEM_EVENT_REASON_UNKNOWN     0    /* typical reason */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-28 11:48 ` [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
@ 2014-08-28 12:03   ` Jan Beulich
  2014-08-28 12:08     ` Razvan Cojocaru
  0 siblings, 1 reply; 35+ messages in thread
From: Jan Beulich @ 2014-08-28 12:03 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 28.08.14 at 13:48, <rcojocaru@bitdefender.com> wrote:
> +    case XEN_DOMCTL_request_pagefault:
> +    {
> +        unsigned int vcpu = op->u.vcpucontext.vcpu;

So you're using two different structures of the union - how can
that possibly work? You've got a 32-bi padding field, which you can
easily use to indicate the desired vCPU. Apart from that I'm not
seeing how your intended "any vCPU" is now getting handled.

Jan

> +        struct vcpu *v;
> +
> +        ret = -EINVAL;
> +        if ( vcpu >= d->max_vcpus || (v = d->vcpu[vcpu]) == NULL ||
> +             !is_hvm_domain(d) )
> +            break;
> +
> +        v->arch.pagefault_request.address_space =
> +            op->u.request_pagefault_info.address_space;
> +        v->arch.pagefault_request.virtual_address =
> +            op->u.request_pagefault_info.virtual_address;
> +        v->arch.pagefault_request.errcode =
> +            op->u.request_pagefault_info.errcode;
> +        v->arch.pagefault_request.valid = 1;
> +
> +        ret = 0;
> +    }
> +    break;

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-28 12:03   ` Jan Beulich
@ 2014-08-28 12:08     ` Razvan Cojocaru
  2014-08-28 12:11       ` Jan Beulich
  0 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 12:08 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 08/28/2014 03:03 PM, Jan Beulich wrote:
>>>> On 28.08.14 at 13:48, <rcojocaru@bitdefender.com> wrote:
>> +    case XEN_DOMCTL_request_pagefault:
>> +    {
>> +        unsigned int vcpu = op->u.vcpucontext.vcpu;
> 
> So you're using two different structures of the union - how can
> that possibly work? You've got a 32-bi padding field, which you can
> easily use to indicate the desired vCPU. Apart from that I'm not
> seeing how your intended "any vCPU" is now getting handled.

Sorry about that, started from a copy / paste bug from another domctl
case. I'll add the vcpu field to our struct and use that.

Not sure I follow the second part of your comment. Assuming the union
problem is fixed, is this not what you meant about handling the page
fault injection VCPU-based vs domain-based?


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 5/5] xen: Handle resumed instruction based on previous mem_event reply
  2014-08-28 11:48 ` [PATCH RFC V9 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
@ 2014-08-28 12:09   ` Jan Beulich
  0 siblings, 0 replies; 35+ messages in thread
From: Jan Beulich @ 2014-08-28 12:09 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 28.08.14 at 13:48, <rcojocaru@bitdefender.com> wrote:
> +static bool_t vmx_exited_by_nested_pagefault(void)
> +{
> +    unsigned long exit_qualification, exit_reason;
> +
> +    __vmread(VM_EXIT_REASON, &exit_reason);
> +
> +    if ( exit_reason != EXIT_REASON_EPT_VIOLATION )
> +        return 0;

Actually, looking at this again I think an assertion would have been
the better choice here, as the only code path leading here should
be guaranteeing this to be the case. But anyway, with this needing
re-basing on top of Tamas'es patches, this may go away entirely.

Jan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-28 12:08     ` Razvan Cojocaru
@ 2014-08-28 12:11       ` Jan Beulich
  2014-08-28 12:23         ` Razvan Cojocaru
                           ` (2 more replies)
  0 siblings, 3 replies; 35+ messages in thread
From: Jan Beulich @ 2014-08-28 12:11 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 28.08.14 at 14:08, <rcojocaru@bitdefender.com> wrote:
> On 08/28/2014 03:03 PM, Jan Beulich wrote:
>>>>> On 28.08.14 at 13:48, <rcojocaru@bitdefender.com> wrote:
>>> +    case XEN_DOMCTL_request_pagefault:
>>> +    {
>>> +        unsigned int vcpu = op->u.vcpucontext.vcpu;
>> 
>> So you're using two different structures of the union - how can
>> that possibly work? You've got a 32-bi padding field, which you can
>> easily use to indicate the desired vCPU. Apart from that I'm not
>> seeing how your intended "any vCPU" is now getting handled.
> 
> Sorry about that, started from a copy / paste bug from another domctl
> case. I'll add the vcpu field to our struct and use that.
> 
> Not sure I follow the second part of your comment. Assuming the union
> problem is fixed, is this not what you meant about handling the page
> fault injection VCPU-based vs domain-based?

It is, but you said you want an "I don't care on which vCPU"
special case. In fact with your previous explanations I could
see you getting into trouble if on a large guest you'd have to
wait for one particular CPU to get to carry out the desired
swap-in.

Jan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-28 12:11       ` Jan Beulich
@ 2014-08-28 12:23         ` Razvan Cojocaru
  2014-08-28 12:37         ` Razvan Cojocaru
  2014-08-29  7:44         ` Razvan Cojocaru
  2 siblings, 0 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 12:23 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 08/28/2014 03:11 PM, Jan Beulich wrote:
>>>> On 28.08.14 at 14:08, <rcojocaru@bitdefender.com> wrote:
>> On 08/28/2014 03:03 PM, Jan Beulich wrote:
>>>>>> On 28.08.14 at 13:48, <rcojocaru@bitdefender.com> wrote:
>>>> +    case XEN_DOMCTL_request_pagefault:
>>>> +    {
>>>> +        unsigned int vcpu = op->u.vcpucontext.vcpu;
>>>
>>> So you're using two different structures of the union - how can
>>> that possibly work? You've got a 32-bi padding field, which you can
>>> easily use to indicate the desired vCPU. Apart from that I'm not
>>> seeing how your intended "any vCPU" is now getting handled.
>>
>> Sorry about that, started from a copy / paste bug from another domctl
>> case. I'll add the vcpu field to our struct and use that.
>>
>> Not sure I follow the second part of your comment. Assuming the union
>> problem is fixed, is this not what you meant about handling the page
>> fault injection VCPU-based vs domain-based?
> 
> It is, but you said you want an "I don't care on which vCPU"
> special case. In fact with your previous explanations I could
> see you getting into trouble if on a large guest you'd have to
> wait for one particular CPU to get to carry out the desired
> swap-in.

I do, the code simply uses VCPU 0 for this now. The delay might indeed
be a problem (though not a huge one, I'll have to check), but I'm not
sure how else to strike a balance between the special case we need
(which is fine for a mem_event-based application) and the general case
(where there might not be any restrictions on the client).


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-28 12:11       ` Jan Beulich
  2014-08-28 12:23         ` Razvan Cojocaru
@ 2014-08-28 12:37         ` Razvan Cojocaru
  2014-08-29  7:44         ` Razvan Cojocaru
  2 siblings, 0 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-28 12:37 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 08/28/2014 03:11 PM, Jan Beulich wrote:
>>>> On 28.08.14 at 14:08, <rcojocaru@bitdefender.com> wrote:
>> On 08/28/2014 03:03 PM, Jan Beulich wrote:
>>>>>> On 28.08.14 at 13:48, <rcojocaru@bitdefender.com> wrote:
>>>> +    case XEN_DOMCTL_request_pagefault:
>>>> +    {
>>>> +        unsigned int vcpu = op->u.vcpucontext.vcpu;
>>>
>>> So you're using two different structures of the union - how can
>>> that possibly work? You've got a 32-bi padding field, which you can
>>> easily use to indicate the desired vCPU. Apart from that I'm not
>>> seeing how your intended "any vCPU" is now getting handled.
>>
>> Sorry about that, started from a copy / paste bug from another domctl
>> case. I'll add the vcpu field to our struct and use that.
>>
>> Not sure I follow the second part of your comment. Assuming the union
>> problem is fixed, is this not what you meant about handling the page
>> fault injection VCPU-based vs domain-based?
> 
> It is, but you said you want an "I don't care on which vCPU"
> special case. In fact with your previous explanations I could
> see you getting into trouble if on a large guest you'd have to
> wait for one particular CPU to get to carry out the desired
> swap-in.

Yes, on second thought I'll reverse this patch - it's indeed less than
ideal for the case the new domctl has been added. I also need to address
Kevin's suggestion to rename the PF functions.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-28 12:11       ` Jan Beulich
  2014-08-28 12:23         ` Razvan Cojocaru
  2014-08-28 12:37         ` Razvan Cojocaru
@ 2014-08-29  7:44         ` Razvan Cojocaru
  2014-08-29  9:27           ` Jan Beulich
  2 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-08-29  7:44 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 08/28/2014 03:11 PM, Jan Beulich wrote:
>>>> On 28.08.14 at 14:08, <rcojocaru@bitdefender.com> wrote:
>> On 08/28/2014 03:03 PM, Jan Beulich wrote:
>>>>>> On 28.08.14 at 13:48, <rcojocaru@bitdefender.com> wrote:
>>>> +    case XEN_DOMCTL_request_pagefault:
>>>> +    {
>>>> +        unsigned int vcpu = op->u.vcpucontext.vcpu;
>>>
>>> So you're using two different structures of the union - how can
>>> that possibly work? You've got a 32-bi padding field, which you can
>>> easily use to indicate the desired vCPU. Apart from that I'm not
>>> seeing how your intended "any vCPU" is now getting handled.
>>
>> Sorry about that, started from a copy / paste bug from another domctl
>> case. I'll add the vcpu field to our struct and use that.
>>
>> Not sure I follow the second part of your comment. Assuming the union
>> problem is fixed, is this not what you meant about handling the page
>> fault injection VCPU-based vs domain-based?
> 
> It is, but you said you want an "I don't care on which vCPU"
> special case. In fact with your previous explanations I could
> see you getting into trouble if on a large guest you'd have to
> wait for one particular CPU to get to carry out the desired
> swap-in.

I've double-checked what the memory introspection engine requires, and
(thank you for the comment!) it looks like indeed having the PF
injection info per-VCPU is not only suboptimal, but possibly useless for
our purpose (at least when using it with a particular VCPU - such as
VCPU 0), precisely for the reasons you've indicated: because VCPU 0
might actually never run code in the destination virtual space.

Having the PF injection information per-domain ensures that at least one
VCPU ends up triggering the actual page fault. To answer Kevin's
question, whether more than one VCPU ends up doing that occasionally is
not a problem from the memory introspection engine's point of view, and
AFAIK it should not be a problem for a guest OS.

So I've gotten a bit ahead of myself with the change in V9, and this is
why I've reverted back to the original behaviour in V10.

I do understand the preference for a VCPU-based mechanism from a
concurrency point of view, but that would simply potentially fail for
us, hence defeating the purpose of the patch. I'm also not sure how that
would be useful in the general case either, since the same problem that
applies to us would seem to apply to the general case as well.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-29  7:44         ` Razvan Cojocaru
@ 2014-08-29  9:27           ` Jan Beulich
  2014-09-01  7:36             ` Razvan Cojocaru
  0 siblings, 1 reply; 35+ messages in thread
From: Jan Beulich @ 2014-08-29  9:27 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 29.08.14 at 09:44, <rcojocaru@bitdefender.com> wrote:
> I do understand the preference for a VCPU-based mechanism from a
> concurrency point of view, but that would simply potentially fail for
> us, hence defeating the purpose of the patch. I'm also not sure how that
> would be useful in the general case either, since the same problem that
> applies to us would seem to apply to the general case as well.

Yeah, the whole thing probably needs a bit more thinking so that the
interface doesn't end up being a BitDefender-special. Indeed together
with the address space qualification, the interface might not be very
useful when made vCPU-bound. And taking it a little further into the
"generic" direction, allowing this to only inject #PF doesn't make a
very nice interface either. Plus we already have HVMOP_inject_trap,
i.e. your first line of thinking (and eventual explaining as the
motivation for a patch) should be why that can't be used.

Jan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-29  9:27           ` Jan Beulich
@ 2014-09-01  7:36             ` Razvan Cojocaru
  2014-09-01  9:08               ` Jan Beulich
  0 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-01  7:36 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 08/29/2014 12:27 PM, Jan Beulich wrote:
>>>> On 29.08.14 at 09:44, <rcojocaru@bitdefender.com> wrote:
>> I do understand the preference for a VCPU-based mechanism from a
>> concurrency point of view, but that would simply potentially fail for
>> us, hence defeating the purpose of the patch. I'm also not sure how that
>> would be useful in the general case either, since the same problem that
>> applies to us would seem to apply to the general case as well.
> 
> Yeah, the whole thing probably needs a bit more thinking so that the
> interface doesn't end up being a BitDefender-special. Indeed together
> with the address space qualification, the interface might not be very
> useful when made vCPU-bound. And taking it a little further into the
> "generic" direction, allowing this to only inject #PF doesn't make a
> very nice interface either. Plus we already have HVMOP_inject_trap,
> i.e. your first line of thinking (and eventual explaining as the
> motivation for a patch) should be why that can't be used.

I'd say that it's memory-introspection specific rather than 3rd-party
vendor specific. Without this this patch, memory-introspection support
in general is impacted / less flexible, since there's no other way to
bring swapped out pages back in.

For all the reasons you've explained (at least as far as I understand
it) there's not much room to go more generic - so maybe just renaming
the libxc wrapper to something more specific (
xc_domain_request_usermode_pagefault?) is the solution here?


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-01  7:36             ` Razvan Cojocaru
@ 2014-09-01  9:08               ` Jan Beulich
  2014-09-01 11:54                 ` Razvan Cojocaru
  0 siblings, 1 reply; 35+ messages in thread
From: Jan Beulich @ 2014-09-01  9:08 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 01.09.14 at 09:36, <rcojocaru@bitdefender.com> wrote:
> On 08/29/2014 12:27 PM, Jan Beulich wrote:
>>>>> On 29.08.14 at 09:44, <rcojocaru@bitdefender.com> wrote:
>>> I do understand the preference for a VCPU-based mechanism from a
>>> concurrency point of view, but that would simply potentially fail for
>>> us, hence defeating the purpose of the patch. I'm also not sure how that
>>> would be useful in the general case either, since the same problem that
>>> applies to us would seem to apply to the general case as well.
>> 
>> Yeah, the whole thing probably needs a bit more thinking so that the
>> interface doesn't end up being a BitDefender-special. Indeed together
>> with the address space qualification, the interface might not be very
>> useful when made vCPU-bound. And taking it a little further into the
>> "generic" direction, allowing this to only inject #PF doesn't make a
>> very nice interface either. Plus we already have HVMOP_inject_trap,
>> i.e. your first line of thinking (and eventual explaining as the
>> motivation for a patch) should be why that can't be used.
> 
> I'd say that it's memory-introspection specific rather than 3rd-party
> vendor specific. Without this this patch, memory-introspection support
> in general is impacted / less flexible, since there's no other way to
> bring swapped out pages back in.
> 
> For all the reasons you've explained (at least as far as I understand
> it) there's not much room to go more generic - so maybe just renaming
> the libxc wrapper to something more specific (
> xc_domain_request_usermode_pagefault?) is the solution here?

Maybe, but only after you explained why the existing interface can
neither be used nor suitably extended.

Jan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-01  9:08               ` Jan Beulich
@ 2014-09-01 11:54                 ` Razvan Cojocaru
  2014-09-01 12:05                   ` Jan Beulich
  0 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-01 11:54 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 09/01/2014 12:08 PM, Jan Beulich wrote:
>>>> On 01.09.14 at 09:36, <rcojocaru@bitdefender.com> wrote:
>> On 08/29/2014 12:27 PM, Jan Beulich wrote:
>>>>>> On 29.08.14 at 09:44, <rcojocaru@bitdefender.com> wrote:
>>>> I do understand the preference for a VCPU-based mechanism from a
>>>> concurrency point of view, but that would simply potentially fail for
>>>> us, hence defeating the purpose of the patch. I'm also not sure how that
>>>> would be useful in the general case either, since the same problem that
>>>> applies to us would seem to apply to the general case as well.
>>>
>>> Yeah, the whole thing probably needs a bit more thinking so that the
>>> interface doesn't end up being a BitDefender-special. Indeed together
>>> with the address space qualification, the interface might not be very
>>> useful when made vCPU-bound. And taking it a little further into the
>>> "generic" direction, allowing this to only inject #PF doesn't make a
>>> very nice interface either. Plus we already have HVMOP_inject_trap,
>>> i.e. your first line of thinking (and eventual explaining as the
>>> motivation for a patch) should be why that can't be used.
>>
>> I'd say that it's memory-introspection specific rather than 3rd-party
>> vendor specific. Without this this patch, memory-introspection support
>> in general is impacted / less flexible, since there's no other way to
>> bring swapped out pages back in.
>>
>> For all the reasons you've explained (at least as far as I understand
>> it) there's not much room to go more generic - so maybe just renaming
>> the libxc wrapper to something more specific (
>> xc_domain_request_usermode_pagefault?) is the solution here?
> 
> Maybe, but only after you explained why the existing interface can
> neither be used nor suitably extended.

As far as I understand the HVMOP_inject_trap interface, it is simply (in
this case) a way to trigger the equivalent of hvm_inject_page_fault()
from userspace (via libxc).

We need two additional checks:

1. That CR3 matches, because the way the application works, we need to
inject a page fault related to the address space of whatever process is
interesting at the time, and

2. That we're in usermode (the CPL check), because we know that it's
safe to inject a page fault when we're in user mode, but it's not always
safe to do so in kernel mode.

The current interface seems to be a low-level, basic wrapper around
hvm_inject_trap().

What we're trying to do is ask for a page fault when we're both A) in
usermode, and B) when a target process matches - and are only interested
in page faults, no other trap vector.

Technically, the checks could, probably, be moved into (and new
parameters added to) hvm_inject_trap() & friends, but it seems unlikely
that users other than memory introspection applications would be
interested in them, while they would possibly add to the complexity of
the interface. The rest of the clients would have to carry dummy
parameters around to use it.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-01 11:54                 ` Razvan Cojocaru
@ 2014-09-01 12:05                   ` Jan Beulich
  2014-09-02  9:18                     ` Razvan Cojocaru
  0 siblings, 1 reply; 35+ messages in thread
From: Jan Beulich @ 2014-09-01 12:05 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 01.09.14 at 13:54, <rcojocaru@bitdefender.com> wrote:
> On 09/01/2014 12:08 PM, Jan Beulich wrote:
>>>>> On 01.09.14 at 09:36, <rcojocaru@bitdefender.com> wrote:
>>> On 08/29/2014 12:27 PM, Jan Beulich wrote:
>>>>>>> On 29.08.14 at 09:44, <rcojocaru@bitdefender.com> wrote:
>>>>> I do understand the preference for a VCPU-based mechanism from a
>>>>> concurrency point of view, but that would simply potentially fail for
>>>>> us, hence defeating the purpose of the patch. I'm also not sure how that
>>>>> would be useful in the general case either, since the same problem that
>>>>> applies to us would seem to apply to the general case as well.
>>>>
>>>> Yeah, the whole thing probably needs a bit more thinking so that the
>>>> interface doesn't end up being a BitDefender-special. Indeed together
>>>> with the address space qualification, the interface might not be very
>>>> useful when made vCPU-bound. And taking it a little further into the
>>>> "generic" direction, allowing this to only inject #PF doesn't make a
>>>> very nice interface either. Plus we already have HVMOP_inject_trap,
>>>> i.e. your first line of thinking (and eventual explaining as the
>>>> motivation for a patch) should be why that can't be used.
>>>
>>> I'd say that it's memory-introspection specific rather than 3rd-party
>>> vendor specific. Without this this patch, memory-introspection support
>>> in general is impacted / less flexible, since there's no other way to
>>> bring swapped out pages back in.
>>>
>>> For all the reasons you've explained (at least as far as I understand
>>> it) there's not much room to go more generic - so maybe just renaming
>>> the libxc wrapper to something more specific (
>>> xc_domain_request_usermode_pagefault?) is the solution here?
>> 
>> Maybe, but only after you explained why the existing interface can
>> neither be used nor suitably extended.
> 
> As far as I understand the HVMOP_inject_trap interface, it is simply (in
> this case) a way to trigger the equivalent of hvm_inject_page_fault()
> from userspace (via libxc).
> 
> We need two additional checks:
> 
> 1. That CR3 matches, because the way the application works, we need to
> inject a page fault related to the address space of whatever process is
> interesting at the time, and
> 
> 2. That we're in usermode (the CPL check), because we know that it's
> safe to inject a page fault when we're in user mode, but it's not always
> safe to do so in kernel mode.
> 
> The current interface seems to be a low-level, basic wrapper around
> hvm_inject_trap().
> 
> What we're trying to do is ask for a page fault when we're both A) in
> usermode, and B) when a target process matches - and are only interested
> in page faults, no other trap vector.
> 
> Technically, the checks could, probably, be moved into (and new
> parameters added to) hvm_inject_trap() & friends, but it seems unlikely
> that users other than memory introspection applications would be
> interested in them, while they would possibly add to the complexity of
> the interface. The rest of the clients would have to carry dummy
> parameters around to use it.

I'm not sure: Injecting faults with certain restrictions (like in your
case CPL and CR3) does seem to make quite some sense in
general when the fault isn't intended to be terminal (of a process
or the entire VM). It's just that so far we used fault injection only
to indicate error conditions.

But as always - let's see if others are of differing opinion before
settling on either route.

Jan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-01 12:05                   ` Jan Beulich
@ 2014-09-02  9:18                     ` Razvan Cojocaru
  2014-09-02  9:33                       ` Jan Beulich
  2014-09-02 13:24                       ` Tim Deegan
  0 siblings, 2 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-02  9:18 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 09/01/2014 03:05 PM, Jan Beulich wrote:
>>>> On 01.09.14 at 13:54, <rcojocaru@bitdefender.com> wrote:
>> On 09/01/2014 12:08 PM, Jan Beulich wrote:
>>>>>> On 01.09.14 at 09:36, <rcojocaru@bitdefender.com> wrote:
>>>> On 08/29/2014 12:27 PM, Jan Beulich wrote:
>>>>>>>> On 29.08.14 at 09:44, <rcojocaru@bitdefender.com> wrote:
>>>>>> I do understand the preference for a VCPU-based mechanism from a
>>>>>> concurrency point of view, but that would simply potentially fail for
>>>>>> us, hence defeating the purpose of the patch. I'm also not sure how that
>>>>>> would be useful in the general case either, since the same problem that
>>>>>> applies to us would seem to apply to the general case as well.
>>>>>
>>>>> Yeah, the whole thing probably needs a bit more thinking so that the
>>>>> interface doesn't end up being a BitDefender-special. Indeed together
>>>>> with the address space qualification, the interface might not be very
>>>>> useful when made vCPU-bound. And taking it a little further into the
>>>>> "generic" direction, allowing this to only inject #PF doesn't make a
>>>>> very nice interface either. Plus we already have HVMOP_inject_trap,
>>>>> i.e. your first line of thinking (and eventual explaining as the
>>>>> motivation for a patch) should be why that can't be used.
>>>>
>>>> I'd say that it's memory-introspection specific rather than 3rd-party
>>>> vendor specific. Without this this patch, memory-introspection support
>>>> in general is impacted / less flexible, since there's no other way to
>>>> bring swapped out pages back in.
>>>>
>>>> For all the reasons you've explained (at least as far as I understand
>>>> it) there's not much room to go more generic - so maybe just renaming
>>>> the libxc wrapper to something more specific (
>>>> xc_domain_request_usermode_pagefault?) is the solution here?
>>>
>>> Maybe, but only after you explained why the existing interface can
>>> neither be used nor suitably extended.
>>
>> As far as I understand the HVMOP_inject_trap interface, it is simply (in
>> this case) a way to trigger the equivalent of hvm_inject_page_fault()
>> from userspace (via libxc).
>>
>> We need two additional checks:
>>
>> 1. That CR3 matches, because the way the application works, we need to
>> inject a page fault related to the address space of whatever process is
>> interesting at the time, and
>>
>> 2. That we're in usermode (the CPL check), because we know that it's
>> safe to inject a page fault when we're in user mode, but it's not always
>> safe to do so in kernel mode.
>>
>> The current interface seems to be a low-level, basic wrapper around
>> hvm_inject_trap().
>>
>> What we're trying to do is ask for a page fault when we're both A) in
>> usermode, and B) when a target process matches - and are only interested
>> in page faults, no other trap vector.
>>
>> Technically, the checks could, probably, be moved into (and new
>> parameters added to) hvm_inject_trap() & friends, but it seems unlikely
>> that users other than memory introspection applications would be
>> interested in them, while they would possibly add to the complexity of
>> the interface. The rest of the clients would have to carry dummy
>> parameters around to use it.
> 
> I'm not sure: Injecting faults with certain restrictions (like in your
> case CPL and CR3) does seem to make quite some sense in
> general when the fault isn't intended to be terminal (of a process
> or the entire VM). It's just that so far we used fault injection only
> to indicate error conditions.
> 
> But as always - let's see if others are of differing opinion before
> settling on either route.

While waiting for other opinions, I've started looking at the
HVMOP_inject_trap code, and it uses a per-VCPU data-saving model similar
to the one we've been discussing (and agreed that does not work for the
explained use-cases).

The first giveaway is the libxc function:

/*
 * Injects a hardware/software CPU trap, to take effect the next time
the HVM
 * resumes.
 */
int xc_hvm_inject_trap(
    xc_interface *xch, domid_t dom, int vcpu, uint32_t vector,
    uint32_t type, uint32_t error_code, uint32_t insn_len,
    uint64_t cr2);

which takes an "int vcpu" parameter. Then, this happens in
xen/arch/x86/hvm/hvm.c:

    case HVMOP_inject_trap:
    {
        xen_hvm_inject_trap_t tr;
        struct domain *d;
        struct vcpu *v;

        if ( copy_from_guest(&tr, arg, 1 ) )
            return -EFAULT;

        rc = rcu_lock_remote_domain_by_id(tr.domid, &d);
        if ( rc != 0 )
            return rc;

        rc = -EINVAL;
        if ( !is_hvm_domain(d) )
            goto param_fail8;

        rc = xsm_hvm_control(XSM_DM_PRIV, d, op);
        if ( rc )
            goto param_fail8;

        rc = -ENOENT;
        if ( tr.vcpuid >= d->max_vcpus || (v = d->vcpu[tr.vcpuid]) == NULL )
            goto param_fail8;

        if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
            rc = -EBUSY;
        else
        {
            v->arch.hvm_vcpu.inject_trap.vector = tr.vector;
            v->arch.hvm_vcpu.inject_trap.type = tr.type;
            v->arch.hvm_vcpu.inject_trap.error_code = tr.error_code;
            v->arch.hvm_vcpu.inject_trap.insn_len = tr.insn_len;
            v->arch.hvm_vcpu.inject_trap.cr2 = tr.cr2;
            rc = 0;
        }

    param_fail8:
        rcu_unlock_domain(d);
        break;
    }

The "v->arch.hvm_vcpu.inject_trap.<member> = initializer;" code again
points to a per-VCPU implementation.

And finally, hvm_do_resume() is also VCPU-dependent:

void hvm_do_resume(struct vcpu *v)
{
    struct domain *d = v->domain;
    struct hvm_ioreq_server *s;

    check_wakeup_from_wait();

    if ( is_hvm_vcpu(v) )
        pt_restore_timer(v);

    list_for_each_entry ( s,
                          &d->arch.hvm_domain.ioreq_server.list,
                          list_entry )
    {
        struct hvm_ioreq_vcpu *sv;

        list_for_each_entry ( sv,
                              &s->ioreq_vcpu_list,
                              list_entry )
        {
            if ( sv->vcpu == v )
            {
                if ( !hvm_wait_for_io(sv, get_ioreq(s, v)) )
                    return;

                break;
            }
        }
    }

    /* Inject pending hw/sw trap */
    if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
    {
        hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
        v->arch.hvm_vcpu.inject_trap.vector = -1;
    }
}

While we need to set the data per-domain and have whatever VCPU inject
the page fault - _but_only_if_ it's in usermode and its CR3 points to
something interesting.

Hope I've been following the code correctly.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-02  9:18                     ` Razvan Cojocaru
@ 2014-09-02  9:33                       ` Jan Beulich
  2014-09-02  9:44                         ` Razvan Cojocaru
  2014-09-02 13:24                       ` Tim Deegan
  1 sibling, 1 reply; 35+ messages in thread
From: Jan Beulich @ 2014-09-02  9:33 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 02.09.14 at 11:18, <rcojocaru@bitdefender.com> wrote:
> While we need to set the data per-domain and have whatever VCPU inject
> the page fault - _but_only_if_ it's in usermode and its CR3 points to
> something interesting.

Right - but none of this is an argument against adding a wildcard
specifier for the vCPU passed in the existing interface and -
assuming this is a tools only interface - add the additional qualifiers
(and perhaps even make the code obey to them when used in a
vCPU-centric invocation).

Jan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-02  9:33                       ` Jan Beulich
@ 2014-09-02  9:44                         ` Razvan Cojocaru
  2014-09-02 10:08                           ` Jan Beulich
  0 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-02  9:44 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, eddie.dong, stefano.stabellini, andrew.cooper3, tim,
	jun.nakajima, xen-devel, ian.jackson, ian.campbell

On 09/02/2014 12:33 PM, Jan Beulich wrote:
>>>> On 02.09.14 at 11:18, <rcojocaru@bitdefender.com> wrote:
>> While we need to set the data per-domain and have whatever VCPU inject
>> the page fault - _but_only_if_ it's in usermode and its CR3 points to
>> something interesting.
> 
> Right - but none of this is an argument against adding a wildcard
> specifier for the vCPU passed in the existing interface and -
> assuming this is a tools only interface - add the additional qualifiers
> (and perhaps even make the code obey to them when used in a
> vCPU-centric invocation).

But adding a wildcard for the VCPU would effectively mean that this part
of the HVMOP_inject_trap case handling code would need to be modified to
use per-domain data instead of per-VCPU data (which with a wildcard
would not make sense):

        rc = -ENOENT;
        if ( tr.vcpuid >= d->max_vcpus || (v = d->vcpu[tr.vcpuid]) == NULL )
            goto param_fail8;

        if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
            rc = -EBUSY;
        else
        {
            v->arch.hvm_vcpu.inject_trap.vector = tr.vector;
            v->arch.hvm_vcpu.inject_trap.type = tr.type;
            v->arch.hvm_vcpu.inject_trap.error_code = tr.error_code;
            v->arch.hvm_vcpu.inject_trap.insn_len = tr.insn_len;
            v->arch.hvm_vcpu.inject_trap.cr2 = tr.cr2;
            rc = 0;
        }

The alternative would be to set the data to _all_ of the VCPUs for the
wildcard case, but that could potentially trigger a page fault for every
VCPU.

While I can't possibly have any technical arguments against using a
wildcard for the VCPU (-1 looks like a likely candidate) and adding a
bool_t usermode and an unsigned long cr3 parameter / member, those
changes might somewhat warp the design of HVMOP_inject_trap.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-02  9:44                         ` Razvan Cojocaru
@ 2014-09-02 10:08                           ` Jan Beulich
  0 siblings, 0 replies; 35+ messages in thread
From: Jan Beulich @ 2014-09-02 10:08 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, tim, jun.nakajima, xen-devel, ian.jackson

>>> On 02.09.14 at 11:44, <rcojocaru@bitdefender.com> wrote:
> On 09/02/2014 12:33 PM, Jan Beulich wrote:
>>>>> On 02.09.14 at 11:18, <rcojocaru@bitdefender.com> wrote:
>>> While we need to set the data per-domain and have whatever VCPU inject
>>> the page fault - _but_only_if_ it's in usermode and its CR3 points to
>>> something interesting.
>> 
>> Right - but none of this is an argument against adding a wildcard
>> specifier for the vCPU passed in the existing interface and -
>> assuming this is a tools only interface - add the additional qualifiers
>> (and perhaps even make the code obey to them when used in a
>> vCPU-centric invocation).
> 
> But adding a wildcard for the VCPU would effectively mean that this part
> of the HVMOP_inject_trap case handling code would need to be modified to
> use per-domain data instead of per-VCPU data (which with a wildcard
> would not make sense):
> 
>         rc = -ENOENT;
>         if ( tr.vcpuid >= d->max_vcpus || (v = d->vcpu[tr.vcpuid]) == NULL )
>             goto param_fail8;
> 
>         if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
>             rc = -EBUSY;
>         else
>         {
>             v->arch.hvm_vcpu.inject_trap.vector = tr.vector;
>             v->arch.hvm_vcpu.inject_trap.type = tr.type;
>             v->arch.hvm_vcpu.inject_trap.error_code = tr.error_code;
>             v->arch.hvm_vcpu.inject_trap.insn_len = tr.insn_len;
>             v->arch.hvm_vcpu.inject_trap.cr2 = tr.cr2;
>             rc = 0;
>         }
> 
> The alternative would be to set the data to _all_ of the VCPUs for the
> wildcard case, but that could potentially trigger a page fault for every
> VCPU.

No - additionally to having a per-vCPU inject_trap field, you'd also
add a per-domain one.

Jan

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-02  9:18                     ` Razvan Cojocaru
  2014-09-02  9:33                       ` Jan Beulich
@ 2014-09-02 13:24                       ` Tim Deegan
  2014-09-09 16:57                         ` George Dunlap
  1 sibling, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2014-09-02 13:24 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, jun.nakajima,
	andrew.cooper3, eddie.dong, Jan Beulich, xen-devel, ian.jackson

Hi,

At 12:18 +0300 on 02 Sep (1409656686), Razvan Cojocaru wrote:
> While we need to set the data per-domain and have whatever VCPU inject
> the page fault - _but_only_if_ it's in usermode and its CR3 points to
> something interesting.

That's a strange and specific thing to ask the hypervisor to do for
you.  Given that you can already trap CR3 changes as mem-events can
you trigger the fault injection in response to the contect switch?
I guess that would probably catch it in kernel mode. :(

It also seems like it will be hard to do reliably -- HVM guests don't
trap on user<->kernel transitions so we're not guaranteed to be able
to stop in the right place.

I wonder if there's some trick you can play by tinkering with other
memory permissions to trigger an event at the right time (e.g. find
the process you want and walk its stack to find a user-space return
address and then mark it read-only?)

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-02 13:24                       ` Tim Deegan
@ 2014-09-09 16:57                         ` George Dunlap
  2014-09-09 17:39                           ` Razvan Cojocaru
  2014-09-09 20:14                           ` Tim Deegan
  0 siblings, 2 replies; 35+ messages in thread
From: George Dunlap @ 2014-09-09 16:57 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Tian, Kevin, Ian Campbell, Razvan Cojocaru, Stefano Stabellini,
	Andrew Cooper, Dong, Eddie, Jan Beulich, Jun Nakajima, xen-devel,
	Ian Jackson

On Tue, Sep 2, 2014 at 2:24 PM, Tim Deegan <tim@xen.org> wrote:
> Hi,
>
> At 12:18 +0300 on 02 Sep (1409656686), Razvan Cojocaru wrote:
>> While we need to set the data per-domain and have whatever VCPU inject
>> the page fault - _but_only_if_ it's in usermode and its CR3 points to
>> something interesting.
>
> That's a strange and specific thing to ask the hypervisor to do for
> you.  Given that you can already trap CR3 changes as mem-events can
> you trigger the fault injection in response to the contect switch?
> I guess that would probably catch it in kernel mode. :(

I was wondering, rather than special-casing inject_trap, would it make
sense to be able for the memory controller to get notifications when
certain more complex conditions happen (e.g., "some vcpu is in user
mode with this CR3")?  Then the controller could ask to be notified
when the event happens, and when it does, just call inject_fault.

That way, inject_fault isn't special-cased at all; and one could
imagine designing the "condition" such that any number of interesting
conditions could be trapped.

Thoughts?

But ultimately, as Tim said, you're basically just *hoping* that it
won't take too long to happen to be at the hypervisor when the proper
condition happens.  If the process in question isn't getting many
interrupts, or is spending the vast majority of its time in the
kernel, you may end up waiting an unbounded amount of time to be able
to "catch" it in user mode.  It seems like it would be better to find
a reliable way to trap on the return into user mode, in which case you
wouldn't need to have a special "wait for this complicated event to
happen" call at all, would you?

 -George

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-09 16:57                         ` George Dunlap
@ 2014-09-09 17:39                           ` Razvan Cojocaru
  2014-09-09 18:38                             ` Tamas K Lengyel
  2014-09-09 20:14                           ` Tim Deegan
  1 sibling, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-09 17:39 UTC (permalink / raw)
  To: George Dunlap, Tim Deegan
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Andrew Cooper, Dong, Eddie, Jan Beulich, xen-devel, Ian Jackson

On 09/09/14 19:57, George Dunlap wrote:
> On Tue, Sep 2, 2014 at 2:24 PM, Tim Deegan <tim@xen.org> wrote:
>> Hi,
>>
>> At 12:18 +0300 on 02 Sep (1409656686), Razvan Cojocaru wrote:
>>> While we need to set the data per-domain and have whatever VCPU inject
>>> the page fault - _but_only_if_ it's in usermode and its CR3 points to
>>> something interesting.
>>
>> That's a strange and specific thing to ask the hypervisor to do for
>> you.  Given that you can already trap CR3 changes as mem-events can
>> you trigger the fault injection in response to the contect switch?
>> I guess that would probably catch it in kernel mode. :(
> 
> I was wondering, rather than special-casing inject_trap, would it make
> sense to be able for the memory controller to get notifications when
> certain more complex conditions happen (e.g., "some vcpu is in user
> mode with this CR3")?  Then the controller could ask to be notified
> when the event happens, and when it does, just call inject_fault.
> 
> That way, inject_fault isn't special-cased at all; and one could
> imagine designing the "condition" such that any number of interesting
> conditions could be trapped.
> 
> Thoughts?

Are you talking about more complex mem_event-sending conditions? That's
certainly interesting. For now, we do have CR3 events, however waiting
for all CR3-change events is prohibitive in our case, because of the
frequent dom0 <-> HV switches incurred (which slow down the monitored
guest quite visibly).

> But ultimately, as Tim said, you're basically just *hoping* that it
> won't take too long to happen to be at the hypervisor when the proper
> condition happens.  If the process in question isn't getting many
> interrupts, or is spending the vast majority of its time in the
> kernel, you may end up waiting an unbounded amount of time to be able
> to "catch" it in user mode.  It seems like it would be better to find
> a reliable way to trap on the return into user mode, in which case you
> wouldn't need to have a special "wait for this complicated event to
> happen" call at all, would you?

Indeed, but it is assumed that the trap injection request is being made
by the caller in the proper context (when it knows that the condition
will be true sooner rather than later).


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-09 17:39                           ` Razvan Cojocaru
@ 2014-09-09 18:38                             ` Tamas K Lengyel
  2014-09-10  8:09                               ` Razvan Cojocaru
  0 siblings, 1 reply; 35+ messages in thread
From: Tamas K Lengyel @ 2014-09-09 18:38 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	George Dunlap, Tim Deegan, Jan Beulich, Dong, Eddie,
	Jun Nakajima, xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 1982 bytes --]

> > But ultimately, as Tim said, you're basically just *hoping* that it
> > won't take too long to happen to be at the hypervisor when the proper
> > condition happens.  If the process in question isn't getting many
> > interrupts, or is spending the vast majority of its time in the
> > kernel, you may end up waiting an unbounded amount of time to be able
> > to "catch" it in user mode.  It seems like it would be better to find
> > a reliable way to trap on the return into user mode, in which case you
> > wouldn't need to have a special "wait for this complicated event to
> > happen" call at all, would you?
>
> Indeed, but it is assumed that the trap injection request is being made
> by the caller in the proper context (when it knows that the condition
> will be true sooner rather than later).
>

How is it known that the condition will be true soon? Some more information
on what you consider 'proper context' would be valuable.

One solution I have used in the past is to just single-step the target
application with MTF until it enters user-mode. It does have a heavy
overhead when the target application is scheduled. Depending how often you
need to catch the target process in usermode after a context-switch is what
determines if this is a prohibitive performance overhead or not, in my case
it was acceptable. An alternative I was contemplating is to look at the
stack of the target application and inject an INT3 into the IP found in the
trapframe. That approach has the least overhead as the trap really only
triggers when the CPL just switched to user-mode. However it requires some
OS specific knowledge to know what the trapframe on the stack looks like,
which may require some level of trust in the guest kernel. Depending on the
use case, it may also require some extra EPT protection to hide the
presence of the trap from the OS or other in-guest code.. So there are
alternatives already, it's just a question of how fast and how reliable
they are.

Tamas

[-- Attachment #1.2: Type: text/html, Size: 2395 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-09 16:57                         ` George Dunlap
  2014-09-09 17:39                           ` Razvan Cojocaru
@ 2014-09-09 20:14                           ` Tim Deegan
  2014-09-10  9:30                             ` Razvan Cojocaru
  1 sibling, 1 reply; 35+ messages in thread
From: Tim Deegan @ 2014-09-09 20:14 UTC (permalink / raw)
  To: George Dunlap
  Cc: Tian, Kevin, Ian Campbell, Razvan Cojocaru, Stefano Stabellini,
	Andrew Cooper, Dong, Eddie, Jan Beulich, Jun Nakajima, xen-devel,
	Ian Jackson

At 17:57 +0100 on 09 Sep (1410281829), George Dunlap wrote:
> On Tue, Sep 2, 2014 at 2:24 PM, Tim Deegan <tim@xen.org> wrote:
> > Hi,
> >
> > At 12:18 +0300 on 02 Sep (1409656686), Razvan Cojocaru wrote:
> >> While we need to set the data per-domain and have whatever VCPU inject
> >> the page fault - _but_only_if_ it's in usermode and its CR3 points to
> >> something interesting.
> >
> > That's a strange and specific thing to ask the hypervisor to do for
> > you.  Given that you can already trap CR3 changes as mem-events can
> > you trigger the fault injection in response to the contect switch?
> > I guess that would probably catch it in kernel mode. :(
> 
> I was wondering, rather than special-casing inject_trap, would it make
> sense to be able for the memory controller to get notifications when
> certain more complex conditions happen (e.g., "some vcpu is in user
> mode with this CR3")?  Then the controller could ask to be notified
> when the event happens, and when it does, just call inject_fault.

Yes, that sounds like a better place to put this kind of test.  As
part of the mem_event trigger framework it doesn't seem nearly so out
of place (and it avoids many of the problems of clashes between
different event injection paths).

> That way, inject_fault isn't special-cased at all; and one could
> imagine designing the "condition" such that any number of interesting
> conditions could be trapped.
> 
> Thoughts?
> 
> But ultimately, as Tim said, you're basically just *hoping* that it
> won't take too long to happen to be at the hypervisor when the proper
> condition happens.  If the process in question isn't getting many
> interrupts, or is spending the vast majority of its time in the
> kernel, you may end up waiting an unbounded amount of time to be able
> to "catch" it in user mode.  It seems like it would be better to find
> a reliable way to trap on the return into user mode, in which case you
> wouldn't need to have a special "wait for this complicated event to
> happen" call at all, would you?

Yeah; I was thinking about page-protecting the process's stack as an
approach to this.  Breakpointing the return address might work too but
would probably cause more false alarms -- you'd at least need to walk
up past the libc/win32 wrappers to avoid trapping every thread.

Ideally there'd be something vcpu-specific we could tinker with
(e.g. arranging MSRs so that SYSRET will fault) once we see the right
CR3 (assuming intercepting CR3 is cheap enough in this case).

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-09 18:38                             ` Tamas K Lengyel
@ 2014-09-10  8:09                               ` Razvan Cojocaru
  2014-09-10  8:48                                 ` Andrew Cooper
  0 siblings, 1 reply; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-10  8:09 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Andrew Cooper, George Dunlap, Tim Deegan, Dong, Eddie,
	Jan Beulich, xen-devel, Ian Jackson

On 09/09/2014 09:38 PM, Tamas K Lengyel wrote:
> 
>     > But ultimately, as Tim said, you're basically just *hoping* that it
>     > won't take too long to happen to be at the hypervisor when the proper
>     > condition happens.  If the process in question isn't getting many
>     > interrupts, or is spending the vast majority of its time in the
>     > kernel, you may end up waiting an unbounded amount of time to be able
>     > to "catch" it in user mode.  It seems like it would be better to find
>     > a reliable way to trap on the return into user mode, in which case you
>     > wouldn't need to have a special "wait for this complicated event to
>     > happen" call at all, would you?
> 
>     Indeed, but it is assumed that the trap injection request is being made
>     by the caller in the proper context (when it knows that the condition
>     will be true sooner rather than later).
> 
> 
> How is it known that the condition will be true soon? Some more
> information on what you consider 'proper context' would be valuable.

It's actually pretty simple for us: the application always requests an
injection when the guest is already in the address space of the
interesting application, and in user mode.


Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-10  8:09                               ` Razvan Cojocaru
@ 2014-09-10  8:48                                 ` Andrew Cooper
  2014-09-10  8:55                                   ` Razvan Cojocaru
  0 siblings, 1 reply; 35+ messages in thread
From: Andrew Cooper @ 2014-09-10  8:48 UTC (permalink / raw)
  To: Razvan Cojocaru, Tamas K Lengyel
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Dong, Eddie, George Dunlap, Tim Deegan, Jan Beulich, xen-devel,
	Ian Jackson

On 10/09/2014 09:09, Razvan Cojocaru wrote:
> On 09/09/2014 09:38 PM, Tamas K Lengyel wrote:
>>     > But ultimately, as Tim said, you're basically just *hoping* that it
>>     > won't take too long to happen to be at the hypervisor when the proper
>>     > condition happens.  If the process in question isn't getting many
>>     > interrupts, or is spending the vast majority of its time in the
>>     > kernel, you may end up waiting an unbounded amount of time to be able
>>     > to "catch" it in user mode.  It seems like it would be better to find
>>     > a reliable way to trap on the return into user mode, in which case you
>>     > wouldn't need to have a special "wait for this complicated event to
>>     > happen" call at all, would you?
>>
>>     Indeed, but it is assumed that the trap injection request is being made
>>     by the caller in the proper context (when it knows that the condition
>>     will be true sooner rather than later).
>>
>>
>> How is it known that the condition will be true soon? Some more
>> information on what you consider 'proper context' would be valuable.
> It's actually pretty simple for us: the application always requests an
> injection when the guest is already in the address space of the
> interesting application, and in user mode.

Does this mean that you always request a pagefault as a direct result of
a mem_event, when the vcpu is in blocked the correct context?

If so, how about extending the mem_event response mechanism with
trap/fault information?

~Andrew

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-10  8:48                                 ` Andrew Cooper
@ 2014-09-10  8:55                                   ` Razvan Cojocaru
  2014-09-10  9:34                                     ` Andrew Cooper
  2014-09-10 10:39                                     ` George Dunlap
  0 siblings, 2 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-10  8:55 UTC (permalink / raw)
  To: Andrew Cooper, Tamas K Lengyel
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Dong, Eddie, George Dunlap, Tim Deegan, Jan Beulich, xen-devel,
	Ian Jackson

On 09/10/2014 11:48 AM, Andrew Cooper wrote:
> On 10/09/2014 09:09, Razvan Cojocaru wrote:
>> On 09/09/2014 09:38 PM, Tamas K Lengyel wrote:
>>>     > But ultimately, as Tim said, you're basically just *hoping* that it
>>>     > won't take too long to happen to be at the hypervisor when the proper
>>>     > condition happens.  If the process in question isn't getting many
>>>     > interrupts, or is spending the vast majority of its time in the
>>>     > kernel, you may end up waiting an unbounded amount of time to be able
>>>     > to "catch" it in user mode.  It seems like it would be better to find
>>>     > a reliable way to trap on the return into user mode, in which case you
>>>     > wouldn't need to have a special "wait for this complicated event to
>>>     > happen" call at all, would you?
>>>
>>>     Indeed, but it is assumed that the trap injection request is being made
>>>     by the caller in the proper context (when it knows that the condition
>>>     will be true sooner rather than later).
>>>
>>>
>>> How is it known that the condition will be true soon? Some more
>>> information on what you consider 'proper context' would be valuable.
>> It's actually pretty simple for us: the application always requests an
>> injection when the guest is already in the address space of the
>> interesting application, and in user mode.
> 
> Does this mean that you always request a pagefault as a direct result of
> a mem_event, when the vcpu is in blocked the correct context?

Yes, exactly.

> If so, how about extending the mem_event response mechanism with
> trap/fault information?

For this particular case, that is indeed a very good suggestion -
however, things may change. From what I understand, it is likely that in
the future we (or somebody else doing memory introspection) will need to
request a page fault injection in other cases. The risks described above
will of course exist in that case, but they are acceptable.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-09 20:14                           ` Tim Deegan
@ 2014-09-10  9:30                             ` Razvan Cojocaru
  2014-09-10  9:59                               ` Tamas K Lengyel
  2014-09-10 10:44                               ` Tim Deegan
  0 siblings, 2 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-10  9:30 UTC (permalink / raw)
  To: Tim Deegan, George Dunlap
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Andrew Cooper, Dong, Eddie, Jan Beulich, xen-devel, Ian Jackson

On 09/09/2014 11:14 PM, Tim Deegan wrote:
> At 17:57 +0100 on 09 Sep (1410281829), George Dunlap wrote:
>> On Tue, Sep 2, 2014 at 2:24 PM, Tim Deegan <tim@xen.org> wrote:
>>> Hi,
>>>
>>> At 12:18 +0300 on 02 Sep (1409656686), Razvan Cojocaru wrote:
>>>> While we need to set the data per-domain and have whatever VCPU inject
>>>> the page fault - _but_only_if_ it's in usermode and its CR3 points to
>>>> something interesting.
>>>
>>> That's a strange and specific thing to ask the hypervisor to do for
>>> you.  Given that you can already trap CR3 changes as mem-events can
>>> you trigger the fault injection in response to the contect switch?
>>> I guess that would probably catch it in kernel mode. :(
>>
>> I was wondering, rather than special-casing inject_trap, would it make
>> sense to be able for the memory controller to get notifications when
>> certain more complex conditions happen (e.g., "some vcpu is in user
>> mode with this CR3")?  Then the controller could ask to be notified
>> when the event happens, and when it does, just call inject_fault.
> 
> Yes, that sounds like a better place to put this kind of test.  As
> part of the mem_event trigger framework it doesn't seem nearly so out
> of place (and it avoids many of the problems of clashes between
> different event injection paths).

Do you mean someplace here (hvm.c)?

3265 int hvm_set_cr3(unsigned long value)
3266 {
3267     struct vcpu *v = current;
3268     struct page_info *page;
3269     unsigned long old;
3270
3271     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
3272          (value != v->arch.hvm_vcpu.guest_cr[3]) )
3273     {
3274         /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
3275         HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
3276         page = get_page_from_gfn(v->domain, value >> PAGE_SHIFT,
3277                                  NULL, P2M_ALLOC);
3278         if ( !page )
3279             goto bad_cr3;
3280
3281         put_page(pagetable_get_page(v->arch.guest_table));
3282         v->arch.guest_table = pagetable_from_page(page);
3283
3284         HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
3285     }
3286
3287     old=v->arch.hvm_vcpu.guest_cr[3];
3288     v->arch.hvm_vcpu.guest_cr[3] = value;
3289     paging_update_cr3(v);
3290     hvm_memory_event_cr3(value, old);
3291     return X86EMUL_OKAY;
3292
3293  bad_cr3:
3294     gdprintk(XENLOG_ERR, "Invalid CR3\n");
3295     domain_crash(v->domain);
3296     return X86EMUL_UNHANDLEABLE;
3297 }

Alongside hvm_memory_event_cr3(value, old), have another function
checking an array of CR3s and if v is in user mode and send out an event?

As I've explained in my earlier reply to Tamas, the way we use the
injection request hypercall now, conditions normally apply for immediate
injection.

Also, I'm not sure the "is it user mode?" check would reliably work at
the time of calling hvm_set_cr3(). Are CR3 loads not always happening in
kernel-mode?

>> That way, inject_fault isn't special-cased at all; and one could
>> imagine designing the "condition" such that any number of interesting
>> conditions could be trapped.
>>
>> Thoughts?
>>
>> But ultimately, as Tim said, you're basically just *hoping* that it
>> won't take too long to happen to be at the hypervisor when the proper
>> condition happens.  If the process in question isn't getting many
>> interrupts, or is spending the vast majority of its time in the
>> kernel, you may end up waiting an unbounded amount of time to be able
>> to "catch" it in user mode.  It seems like it would be better to find
>> a reliable way to trap on the return into user mode, in which case you
>> wouldn't need to have a special "wait for this complicated event to
>> happen" call at all, would you?
> 
> Yeah; I was thinking about page-protecting the process's stack as an
> approach to this.  Breakpointing the return address might work too but
> would probably cause more false alarms -- you'd at least need to walk
> up past the libc/win32 wrappers to avoid trapping every thread.
> 
> Ideally there'd be something vcpu-specific we could tinker with
> (e.g. arranging MSRs so that SYSRET will fault) once we see the right
> CR3 (assuming intercepting CR3 is cheap enough in this case).

All valid suggestions, however they would seem to have a greater impact
on guest responsiveness. There would be quite a lot of CR3 loads and
SYSRETs.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-10  8:55                                   ` Razvan Cojocaru
@ 2014-09-10  9:34                                     ` Andrew Cooper
  2014-09-10 10:39                                     ` George Dunlap
  1 sibling, 0 replies; 35+ messages in thread
From: Andrew Cooper @ 2014-09-10  9:34 UTC (permalink / raw)
  To: Razvan Cojocaru, Tamas K Lengyel
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Dong, Eddie, George Dunlap, Tim Deegan, Jan Beulich, xen-devel,
	Ian Jackson

On 10/09/14 09:55, Razvan Cojocaru wrote:
> On 09/10/2014 11:48 AM, Andrew Cooper wrote:
>> On 10/09/2014 09:09, Razvan Cojocaru wrote:
>>> On 09/09/2014 09:38 PM, Tamas K Lengyel wrote:
>>>>     > But ultimately, as Tim said, you're basically just *hoping* that it
>>>>     > won't take too long to happen to be at the hypervisor when the proper
>>>>     > condition happens.  If the process in question isn't getting many
>>>>     > interrupts, or is spending the vast majority of its time in the
>>>>     > kernel, you may end up waiting an unbounded amount of time to be able
>>>>     > to "catch" it in user mode.  It seems like it would be better to find
>>>>     > a reliable way to trap on the return into user mode, in which case you
>>>>     > wouldn't need to have a special "wait for this complicated event to
>>>>     > happen" call at all, would you?
>>>>
>>>>     Indeed, but it is assumed that the trap injection request is being made
>>>>     by the caller in the proper context (when it knows that the condition
>>>>     will be true sooner rather than later).
>>>>
>>>>
>>>> How is it known that the condition will be true soon? Some more
>>>> information on what you consider 'proper context' would be valuable.
>>> It's actually pretty simple for us: the application always requests an
>>> injection when the guest is already in the address space of the
>>> interesting application, and in user mode.
>> Does this mean that you always request a pagefault as a direct result of
>> a mem_event, when the vcpu is in blocked the correct context?
> Yes, exactly.
>
>> If so, how about extending the mem_event response mechanism with
>> trap/fault information?
> For this particular case, that is indeed a very good suggestion -
> however, things may change. From what I understand, it is likely that in
> the future we (or somebody else doing memory introspection) will need to
> request a page fault injection in other cases. The risks described above
> will of course exist in that case, but they are acceptable.

Right.  I can see your concern, but designing an interface like this for
some hopeful future can be problematic, especially given only a vague
idea of how it would be used in practice.

With the Xen hypercall API/ABI, it is always possible to add something
in the future, and a concrete example of how it is suppose to work does
greatly help with justifying its design and implementation.

In this case, I feel that extending mem_event responses is a very
natural thing to do.  It very closely ties the pagefault to the action
which resulted in the decision for a pagefault, rather than an
apparently asynchronous pagefault request via another mechanism which
userspace has to use when it knows that the vcpu is blocked on a mem_event.

Furthermore, having a general "please inject a fault which looks like
this" mechanism allows the mem_event userspace agent algorithm to choose
to inject other faults for different circumstances.

~Andrew

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-10  9:30                             ` Razvan Cojocaru
@ 2014-09-10  9:59                               ` Tamas K Lengyel
  2014-09-10 10:44                               ` Tim Deegan
  1 sibling, 0 replies; 35+ messages in thread
From: Tamas K Lengyel @ 2014-09-10  9:59 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Andrew Cooper,
	George Dunlap, Tim Deegan, Jan Beulich, Dong, Eddie,
	Jun Nakajima, xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 1947 bytes --]

> >> But ultimately, as Tim said, you're basically just *hoping* that it
> >> won't take too long to happen to be at the hypervisor when the proper
> >> condition happens.  If the process in question isn't getting many
> >> interrupts, or is spending the vast majority of its time in the
> >> kernel, you may end up waiting an unbounded amount of time to be able
> >> to "catch" it in user mode.  It seems like it would be better to find
> >> a reliable way to trap on the return into user mode, in which case you
> >> wouldn't need to have a special "wait for this complicated event to
> >> happen" call at all, would you?
> >
> > Yeah; I was thinking about page-protecting the process's stack as an
> > approach to this.  Breakpointing the return address might work too but
> > would probably cause more false alarms -- you'd at least need to walk
> > up past the libc/win32 wrappers to avoid trapping every thread.
> >
> > Ideally there'd be something vcpu-specific we could tinker with
> > (e.g. arranging MSRs so that SYSRET will fault) once we see the right
> > CR3 (assuming intercepting CR3 is cheap enough in this case).
>
> All valid suggestions, however they would seem to have a greater impact
> on guest responsiveness. There would be quite a lot of CR3 loads and
> SYSRETs.
>

Another approach which may be a bit less complicated then breakpointing the
return address is to walk your target process' pagetables when it is
scheduled and simply make all pages that don't have the supervisor bit set
but are executable to NX in the EPT. That way the kernel code can happily
finish and you get a mem-event of user-mode code execution right away
without having any false alarms (with the added benefit of this being guest
OS agnostic as well). Only downside is if you have multiple vCPUs, then you
need to make sure the event was by the target vCPU and do some more
housekeeping to step the non-target faulting vCPU over the trap.

Tamas

[-- Attachment #1.2: Type: text/html, Size: 2414 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-10  8:55                                   ` Razvan Cojocaru
  2014-09-10  9:34                                     ` Andrew Cooper
@ 2014-09-10 10:39                                     ` George Dunlap
  2014-09-10 10:49                                       ` Razvan Cojocaru
  1 sibling, 1 reply; 35+ messages in thread
From: George Dunlap @ 2014-09-10 10:39 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Andrew Cooper, Tim Deegan, Dong, Eddie, Jan Beulich,
	Tamas K Lengyel, xen-devel, Ian Jackson

On Wed, Sep 10, 2014 at 9:55 AM, Razvan Cojocaru
<rcojocaru@bitdefender.com> wrote:
> On 09/10/2014 11:48 AM, Andrew Cooper wrote:
>> On 10/09/2014 09:09, Razvan Cojocaru wrote:
>>> On 09/09/2014 09:38 PM, Tamas K Lengyel wrote:
>>>>     > But ultimately, as Tim said, you're basically just *hoping* that it
>>>>     > won't take too long to happen to be at the hypervisor when the proper
>>>>     > condition happens.  If the process in question isn't getting many
>>>>     > interrupts, or is spending the vast majority of its time in the
>>>>     > kernel, you may end up waiting an unbounded amount of time to be able
>>>>     > to "catch" it in user mode.  It seems like it would be better to find
>>>>     > a reliable way to trap on the return into user mode, in which case you
>>>>     > wouldn't need to have a special "wait for this complicated event to
>>>>     > happen" call at all, would you?
>>>>
>>>>     Indeed, but it is assumed that the trap injection request is being made
>>>>     by the caller in the proper context (when it knows that the condition
>>>>     will be true sooner rather than later).
>>>>
>>>>
>>>> How is it known that the condition will be true soon? Some more
>>>> information on what you consider 'proper context' would be valuable.
>>> It's actually pretty simple for us: the application always requests an
>>> injection when the guest is already in the address space of the
>>> interesting application, and in user mode.
>>
>> Does this mean that you always request a pagefault as a direct result of
>> a mem_event, when the vcpu is in blocked the correct context?
>
> Yes, exactly.
>
>> If so, how about extending the mem_event response mechanism with
>> trap/fault information?
>
> For this particular case, that is indeed a very good suggestion -
> however, things may change. From what I understand, it is likely that in
> the future we (or somebody else doing memory introspection) will need to
> request a page fault injection in other cases. The risks described above
> will of course exist in that case, but they are acceptable.

Sorry -- do you mean that you don't actually need this functionality
right now, but you think that maybe someone else might need it, or you
may need it in the future?  That doesn't sound very promising; at the
moment it sounds like you're not actually even testing this mechanism
to make sure that it works the way you hope it does.

I definitely think that the clean way to approach this is to try to
allow you to trap on a mem_event at the point where you want to inject
the page fault, and then allow the controller to inject the page fault
with the existing mechanisms.

If at the moment you can already do that reliably, then you don't need
any extra functionality.  (Although being able to say, "continue this
vcpu and inject a trap" might be useful functionality.)

If in the future you think you may not be able to, then we can try to
add mem_event notifications that can help you.

> Also, I'm not sure the "is it user mode?" check would reliably work at
> the time of calling hvm_set_cr3(). Are CR3 loads not always happening in
> kernel-mode?

Yes, cr3 writes happen in kernel mode, so just trapping cr3 accesses
won't get you what you need.  But you could:
1. Trap on a cr3 access *being set to a certain value*
2. Then trap on some other condition which would signal the return to
userspace (int3, page of the return address, &c)
3. Then inject the trap.

That would just be two round-trips to the introspector, which
shouldn't be all that bad.

 -George

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-10  9:30                             ` Razvan Cojocaru
  2014-09-10  9:59                               ` Tamas K Lengyel
@ 2014-09-10 10:44                               ` Tim Deegan
  1 sibling, 0 replies; 35+ messages in thread
From: Tim Deegan @ 2014-09-10 10:44 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Andrew Cooper, George Dunlap, Dong, Eddie, Jan Beulich,
	xen-devel, Ian Jackson

At 12:30 +0300 on 10 Sep (1410348648), Razvan Cojocaru wrote:
> On 09/09/2014 11:14 PM, Tim Deegan wrote:
> > At 17:57 +0100 on 09 Sep (1410281829), George Dunlap wrote:
> >> On Tue, Sep 2, 2014 at 2:24 PM, Tim Deegan <tim@xen.org> wrote:
> >>> Hi,
> >>>
> >>> At 12:18 +0300 on 02 Sep (1409656686), Razvan Cojocaru wrote:
> >>>> While we need to set the data per-domain and have whatever VCPU inject
> >>>> the page fault - _but_only_if_ it's in usermode and its CR3 points to
> >>>> something interesting.
> >>>
> >>> That's a strange and specific thing to ask the hypervisor to do for
> >>> you.  Given that you can already trap CR3 changes as mem-events can
> >>> you trigger the fault injection in response to the contect switch?
> >>> I guess that would probably catch it in kernel mode. :(
> >>
> >> I was wondering, rather than special-casing inject_trap, would it make
> >> sense to be able for the memory controller to get notifications when
> >> certain more complex conditions happen (e.g., "some vcpu is in user
> >> mode with this CR3")?  Then the controller could ask to be notified
> >> when the event happens, and when it does, just call inject_fault.
> > 
> > Yes, that sounds like a better place to put this kind of test.  As
> > part of the mem_event trigger framework it doesn't seem nearly so out
> > of place (and it avoids many of the problems of clashes between
> > different event injection paths).
> 
> Do you mean someplace here (hvm.c)?
> 
> 3265 int hvm_set_cr3(unsigned long value)
> 3266 {
> 3267     struct vcpu *v = current;
> 3268     struct page_info *page;
> 3269     unsigned long old;
> 3270
> 3271     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
> 3272          (value != v->arch.hvm_vcpu.guest_cr[3]) )
> 3273     {
> 3274         /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
> 3275         HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
> 3276         page = get_page_from_gfn(v->domain, value >> PAGE_SHIFT,
> 3277                                  NULL, P2M_ALLOC);
> 3278         if ( !page )
> 3279             goto bad_cr3;
> 3280
> 3281         put_page(pagetable_get_page(v->arch.guest_table));
> 3282         v->arch.guest_table = pagetable_from_page(page);
> 3283
> 3284         HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
> 3285     }
> 3286
> 3287     old=v->arch.hvm_vcpu.guest_cr[3];
> 3288     v->arch.hvm_vcpu.guest_cr[3] = value;
> 3289     paging_update_cr3(v);
> 3290     hvm_memory_event_cr3(value, old);
> 3291     return X86EMUL_OKAY;
> 3292
> 3293  bad_cr3:
> 3294     gdprintk(XENLOG_ERR, "Invalid CR3\n");
> 3295     domain_crash(v->domain);
> 3296     return X86EMUL_UNHANDLEABLE;
> 3297 }
> 
> Alongside hvm_memory_event_cr3(value, old), have another function
> checking an array of CR3s and if v is in user mode and send out an event?

Nope, I mean trigger an event when the guets vcpu is in _user_ mode
with the right CR3.  You could detect that on the VMEXIT path for example.

> As I've explained in my earlier reply to Tamas, the way we use the
> injection request hypercall now, conditions normally apply for immediate
> injection.

Oh, so you don't actually need this delayed/triggered injection right
now?  In that case we should definitely drop all of this in favour of
a simple inject-this-fault-now API.

Once you have an actual user of the feature we can come back to it --
after all, then we'll know exactly what's needed.  And if it never
actually is needed we save ourselves a bunch of code. :)

> Also, I'm not sure the "is it user mode?" check would reliably work at
> the time of calling hvm_set_cr3(). Are CR3 loads not always happening in
> kernel-mode?

Yes, indeed.

> > Yeah; I was thinking about page-protecting the process's stack as an
> > approach to this.  Breakpointing the return address might work too but
> > would probably cause more false alarms -- you'd at least need to walk
> > up past the libc/win32 wrappers to avoid trapping every thread.
> > 
> > Ideally there'd be something vcpu-specific we could tinker with
> > (e.g. arranging MSRs so that SYSRET will fault) once we see the right
> > CR3 (assuming intercepting CR3 is cheap enough in this case).
> 
> All valid suggestions, however they would seem to have a greater impact
> on guest responsiveness. There would be quite a lot of CR3 loads and
> SYSRETs.

Well, you'd only be trapping the CR3 loads, and only to Xen; trapping
CR3 to Xen is measurable but not by any means dreadful -- I'd expect
it to be dwarfed by the other costs of the mem_event user.  The only
SYSRETs that would trap would be the ones already in the right address
space. 

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc
  2014-09-10 10:39                                     ` George Dunlap
@ 2014-09-10 10:49                                       ` Razvan Cojocaru
  0 siblings, 0 replies; 35+ messages in thread
From: Razvan Cojocaru @ 2014-09-10 10:49 UTC (permalink / raw)
  To: George Dunlap
  Cc: Tian, Kevin, Ian Campbell, Stefano Stabellini, Jun Nakajima,
	Andrew Cooper, Tim Deegan, Dong, Eddie, Jan Beulich,
	Tamas K Lengyel, xen-devel, Ian Jackson

On 09/10/14 13:39, George Dunlap wrote:
> On Wed, Sep 10, 2014 at 9:55 AM, Razvan Cojocaru
> <rcojocaru@bitdefender.com> wrote:
>> On 09/10/2014 11:48 AM, Andrew Cooper wrote:
>>> On 10/09/2014 09:09, Razvan Cojocaru wrote:
>>>> On 09/09/2014 09:38 PM, Tamas K Lengyel wrote:
>>>>>     > But ultimately, as Tim said, you're basically just *hoping* that it
>>>>>     > won't take too long to happen to be at the hypervisor when the proper
>>>>>     > condition happens.  If the process in question isn't getting many
>>>>>     > interrupts, or is spending the vast majority of its time in the
>>>>>     > kernel, you may end up waiting an unbounded amount of time to be able
>>>>>     > to "catch" it in user mode.  It seems like it would be better to find
>>>>>     > a reliable way to trap on the return into user mode, in which case you
>>>>>     > wouldn't need to have a special "wait for this complicated event to
>>>>>     > happen" call at all, would you?
>>>>>
>>>>>     Indeed, but it is assumed that the trap injection request is being made
>>>>>     by the caller in the proper context (when it knows that the condition
>>>>>     will be true sooner rather than later).
>>>>>
>>>>>
>>>>> How is it known that the condition will be true soon? Some more
>>>>> information on what you consider 'proper context' would be valuable.
>>>> It's actually pretty simple for us: the application always requests an
>>>> injection when the guest is already in the address space of the
>>>> interesting application, and in user mode.
>>>
>>> Does this mean that you always request a pagefault as a direct result of
>>> a mem_event, when the vcpu is in blocked the correct context?
>>
>> Yes, exactly.
>>
>>> If so, how about extending the mem_event response mechanism with
>>> trap/fault information?
>>
>> For this particular case, that is indeed a very good suggestion -
>> however, things may change. From what I understand, it is likely that in
>> the future we (or somebody else doing memory introspection) will need to
>> request a page fault injection in other cases. The risks described above
>> will of course exist in that case, but they are acceptable.
> 
> Sorry -- do you mean that you don't actually need this functionality
> right now, but you think that maybe someone else might need it, or you
> may need it in the future?  That doesn't sound very promising; at the
> moment it sounds like you're not actually even testing this mechanism
> to make sure that it works the way you hope it does.

No. The functionality _is_ being tested, but currently it's only being
used in the case where we do know that the injection will work immediately.

Also, it's not that someone else might need it. We know our application
will need to fully use it soon. It's not "may", but "will". :)


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2014-09-10 10:49 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-28 11:47 [PATCH RFC V9 1/5] xen: Emulate with no writes Razvan Cojocaru
2014-08-28 11:47 ` [PATCH RFC V9 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
2014-08-28 11:48 ` [PATCH RFC V9 3/5] xen, libxc: Force-enable relevant MSR events Razvan Cojocaru
2014-08-28 11:48 ` [PATCH RFC V9 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
2014-08-28 12:03   ` Jan Beulich
2014-08-28 12:08     ` Razvan Cojocaru
2014-08-28 12:11       ` Jan Beulich
2014-08-28 12:23         ` Razvan Cojocaru
2014-08-28 12:37         ` Razvan Cojocaru
2014-08-29  7:44         ` Razvan Cojocaru
2014-08-29  9:27           ` Jan Beulich
2014-09-01  7:36             ` Razvan Cojocaru
2014-09-01  9:08               ` Jan Beulich
2014-09-01 11:54                 ` Razvan Cojocaru
2014-09-01 12:05                   ` Jan Beulich
2014-09-02  9:18                     ` Razvan Cojocaru
2014-09-02  9:33                       ` Jan Beulich
2014-09-02  9:44                         ` Razvan Cojocaru
2014-09-02 10:08                           ` Jan Beulich
2014-09-02 13:24                       ` Tim Deegan
2014-09-09 16:57                         ` George Dunlap
2014-09-09 17:39                           ` Razvan Cojocaru
2014-09-09 18:38                             ` Tamas K Lengyel
2014-09-10  8:09                               ` Razvan Cojocaru
2014-09-10  8:48                                 ` Andrew Cooper
2014-09-10  8:55                                   ` Razvan Cojocaru
2014-09-10  9:34                                     ` Andrew Cooper
2014-09-10 10:39                                     ` George Dunlap
2014-09-10 10:49                                       ` Razvan Cojocaru
2014-09-09 20:14                           ` Tim Deegan
2014-09-10  9:30                             ` Razvan Cojocaru
2014-09-10  9:59                               ` Tamas K Lengyel
2014-09-10 10:44                               ` Tim Deegan
2014-08-28 11:48 ` [PATCH RFC V9 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
2014-08-28 12:09   ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.