All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC V6 1/5] xen: Emulate with no writes
@ 2014-08-11 15:08 Razvan Cojocaru
  2014-08-11 15:08 ` [PATCH RFC V6 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-11 15:08 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, jbeulich, eddie.dong, jun.nakajima

Added support for emulating an instruction with no memory writes.
Additionally, introduced hvm_emulate_one_full(), which inspects
possible return values from the hvm_emulate_one() functions
(EXCEPTION, UNHANDLEABLE) and acts on them.

Changes since V1:
 - Removed the Linux code that computes the length of an instruction.
 - Unused function parameters are no longer marked.
 - Refactored the code to eliminate redundancy.
 - Made the exception passed on to the guest by hvm_emulate_one_full()
   configurable.

Changes since V2:
 - Renamed hvmemul_write_dummy() to hvmemul_write_discard().
 - Fixed a comment (Xen coding style rules).
 - Renamed hvm_emulate_one_with_ops() to _hvm_emulate_one().
 - Changed stack variable hvm_emulate_ops_no_write into a static const.
 - Modified struct hvm_emulate_ctxt ctx initialization syntax.
 - Renamed unhandleable_trapnr to trapnr and unhandleable_errcode
   to errcode.
 - Changed errcode's type to unsigned int.
 - Removed hvm_emulate_one() loop that went on until it returned
   something other than X86EMUL_RETRY (to prevent potential
   blocking against the time calibration rendezvous).

Changes since V3:
 - The rep_ins, rep_movs and cmpxchg handlers are
   now also inactive.

Changes since V4:
 - Also discarding IO reads (dummy read_io handler).

Changes since V5:
 - Added discard versions of write_io and rep_outs.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/arch/x86/hvm/emulate.c        |  154 ++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/hvm/emulate.h |    5 ++
 2 files changed, 156 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index eac159f..9478566 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -688,6 +688,80 @@ static int hvmemul_write(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_write_discard(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    /* Discarding the write. */
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_ins_discard(
+    uint16_t src_port,
+    enum x86_segment dst_seg,
+    unsigned long dst_offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_movs_discard(
+   enum x86_segment src_seg,
+   unsigned long src_offset,
+   enum x86_segment dst_seg,
+   unsigned long dst_offset,
+   unsigned int bytes_per_rep,
+   unsigned long *reps,
+   struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_outs_discard(
+    enum x86_segment src_seg,
+    unsigned long src_offset,
+    uint16_t dst_port,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_cmpxchg_discard(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_old,
+    void *p_new,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_io_discard(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_io_discard(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -1138,8 +1212,33 @@ static const struct x86_emulate_ops hvm_emulate_ops = {
     .invlpg        = hvmemul_invlpg
 };
 
-int hvm_emulate_one(
-    struct hvm_emulate_ctxt *hvmemul_ctxt)
+static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
+    .read          = hvmemul_read,
+    .insn_fetch    = hvmemul_insn_fetch,
+    .write         = hvmemul_write_discard,
+    .cmpxchg       = hvmemul_cmpxchg_discard,
+    .rep_ins       = hvmemul_rep_ins_discard,
+    .rep_outs      = hvmemul_rep_outs_discard,
+    .rep_movs      = hvmemul_rep_movs_discard,
+    .read_segment  = hvmemul_read_segment,
+    .write_segment = hvmemul_write_segment,
+    .read_io       = hvmemul_read_io_discard,
+    .write_io      = hvmemul_write_io_discard,
+    .read_cr       = hvmemul_read_cr,
+    .write_cr      = hvmemul_write_cr,
+    .read_msr      = hvmemul_read_msr,
+    .write_msr     = hvmemul_write_msr,
+    .wbinvd        = hvmemul_wbinvd,
+    .cpuid         = hvmemul_cpuid,
+    .inject_hw_exception = hvmemul_inject_hw_exception,
+    .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
+    .get_fpu       = hvmemul_get_fpu,
+    .put_fpu       = hvmemul_put_fpu,
+    .invlpg        = hvmemul_invlpg
+};
+
+static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const struct x86_emulate_ops *ops)
 {
     struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
     struct vcpu *curr = current;
@@ -1191,7 +1290,7 @@ int hvm_emulate_one(
     vio->mmio_retrying = vio->mmio_retry;
     vio->mmio_retry = 0;
 
-    rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
+    rc = x86_emulate(&hvmemul_ctxt->ctxt, ops);
 
     if ( rc == X86EMUL_OKAY && vio->mmio_retry )
         rc = X86EMUL_RETRY;
@@ -1239,6 +1338,55 @@ int hvm_emulate_one(
     return X86EMUL_OKAY;
 }
 
+int hvm_emulate_one(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops);
+}
+
+int hvm_emulate_one_no_write(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
+}
+
+void hvm_emulate_one_full(bool_t nowrite, unsigned int trapnr,
+    unsigned int errcode)
+{
+    struct hvm_emulate_ctxt ctx = {{ 0 }};
+    int rc;
+
+    hvm_emulate_prepare(&ctx, guest_cpu_user_regs());
+
+    if ( nowrite )
+        rc = hvm_emulate_one_no_write(&ctx);
+    else
+        rc = hvm_emulate_one(&ctx);
+
+    switch ( rc )
+    {
+    case X86EMUL_UNHANDLEABLE:
+        gdprintk(XENLOG_DEBUG, "Emulation failed @ %04x:%lx: "
+               "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
+               hvmemul_get_seg_reg(x86_seg_cs, &ctx)->sel,
+               ctx.insn_buf_eip,
+               ctx.insn_buf[0], ctx.insn_buf[1],
+               ctx.insn_buf[2], ctx.insn_buf[3],
+               ctx.insn_buf[4], ctx.insn_buf[5],
+               ctx.insn_buf[6], ctx.insn_buf[7],
+               ctx.insn_buf[8], ctx.insn_buf[9]);
+        hvm_inject_hw_exception(trapnr, errcode);
+        break;
+    case X86EMUL_EXCEPTION:
+        if ( ctx.exn_pending )
+            hvm_inject_hw_exception(ctx.exn_vector, ctx.exn_error_code);
+        /* fall through */
+    default:
+        hvm_emulate_writeback(&ctx);
+        break;
+    }
+}
+
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs)
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index 00a06cc..a74f310 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -37,6 +37,11 @@ struct hvm_emulate_ctxt {
 
 int hvm_emulate_one(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
+int hvm_emulate_one_no_write(
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+void hvm_emulate_one_full(bool_t nowrite,
+    unsigned int trapnr,
+    unsigned int errcode);
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH RFC V6 2/5] xen: Optimize introspection access to guest state
  2014-08-11 15:08 [PATCH RFC V6 1/5] xen: Emulate with no writes Razvan Cojocaru
@ 2014-08-11 15:08 ` Razvan Cojocaru
  2014-08-12 15:01   ` Jan Beulich
  2014-08-11 15:08 ` [PATCH RFC V6 3/5] xen, libxc: Force-enable relevant MSR events Razvan Cojocaru
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-11 15:08 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, jbeulich, eddie.dong, jun.nakajima

Speed optimization for introspection purposes: a handful of registers
are sent along with each mem_event. This requires enlargement of the
mem_event_request / mem_event_response stuctures, and additional code
to fill in relevant values. Since the EPT event processing code needs
more data than CR3 or MSR event processors, hvm_mem_event_fill_regs()
fills in less data than p2m_mem_event_fill_regs(), in order to avoid
overhead. Struct hvm_hw_cpu has been considered instead of the custom
struct mem_event_regs_st, but its size would cause quick filling up
of the mem_event ring buffer.

Changes since V1:
 - Replaced guest_x86_mode with cs_arbytes in the mem_event_regs_st
   structure.
 - Removed superfluous preprocessor check for __x86_64__ in
   p2m_mem_event_fill_regs().

Changes since V2:
 - Removed inline function compiler suggestions.
 - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
 - Padded struct mem_event_regs_st.
 - Renamed mem_event_regs_t to mem_event_regs.
 - Readability changes.

Changes since V3:
 - Changed the name of VCPU variables referring to the current
   VCPU to "curr".
 - Renamed "mem_event_regs" to "x86_mem_event_regs" to make it
   explicit.

Changes since V4:
 - Renamed "x86_mem_event_regs" to "mem_event_regs_x86" and
   removed a typedef.

Changes since V5:
 - Const-correctness.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/arch/x86/hvm/hvm.c         |   33 +++++++++++++++++++++++
 xen/arch/x86/mm/p2m.c          |   57 ++++++++++++++++++++++++++++++++++++++++
 xen/include/public/mem_event.h |   39 +++++++++++++++++++++++++++
 3 files changed, 129 insertions(+)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index e834406..c77dfbb 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -6127,6 +6127,38 @@ int hvm_debug_op(struct vcpu *v, int32_t op)
     return rc;
 }
 
+static void hvm_mem_event_fill_regs(mem_event_request_t *req)
+{
+    const struct cpu_user_regs *regs = guest_cpu_user_regs();
+    const struct vcpu *curr = current;
+
+    req->x86_regs.rax = regs->eax;
+    req->x86_regs.rcx = regs->ecx;
+    req->x86_regs.rdx = regs->edx;
+    req->x86_regs.rbx = regs->ebx;
+    req->x86_regs.rsp = regs->esp;
+    req->x86_regs.rbp = regs->ebp;
+    req->x86_regs.rsi = regs->esi;
+    req->x86_regs.rdi = regs->edi;
+
+    req->x86_regs.r8  = regs->r8;
+    req->x86_regs.r9  = regs->r9;
+    req->x86_regs.r10 = regs->r10;
+    req->x86_regs.r11 = regs->r11;
+    req->x86_regs.r12 = regs->r12;
+    req->x86_regs.r13 = regs->r13;
+    req->x86_regs.r14 = regs->r14;
+    req->x86_regs.r15 = regs->r15;
+
+    req->x86_regs.rflags = regs->eflags;
+    req->x86_regs.rip    = regs->eip;
+
+    req->x86_regs.msr_efer = curr->arch.hvm_vcpu.guest_efer;
+    req->x86_regs.cr0 = curr->arch.hvm_vcpu.guest_cr[0];
+    req->x86_regs.cr3 = curr->arch.hvm_vcpu.guest_cr[3];
+    req->x86_regs.cr4 = curr->arch.hvm_vcpu.guest_cr[4];
+}
+
 static int hvm_memory_event_traps(long p, uint32_t reason,
                                   unsigned long value, unsigned long old, 
                                   bool_t gla_valid, unsigned long gla) 
@@ -6171,6 +6203,7 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
         req.gla = old;
     }
     
+    hvm_mem_event_fill_regs(&req);
     mem_event_put_request(d, &d->mem_event->access, &req);
     
     return 1;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index bca9f0f..41a316a 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1323,6 +1323,61 @@ void p2m_mem_paging_resume(struct domain *d)
     }
 }
 
+static void p2m_mem_event_fill_regs(mem_event_request_t *req)
+{
+    const struct cpu_user_regs *regs = guest_cpu_user_regs();
+    struct segment_register seg;
+    struct hvm_hw_cpu ctxt;
+    struct vcpu *curr = current;
+
+    /* Architecture-specific vmcs/vmcb bits */
+    hvm_funcs.save_cpu_ctxt(curr, &ctxt);
+
+    req->x86_regs.rax = regs->eax;
+    req->x86_regs.rcx = regs->ecx;
+    req->x86_regs.rdx = regs->edx;
+    req->x86_regs.rbx = regs->ebx;
+    req->x86_regs.rsp = regs->esp;
+    req->x86_regs.rbp = regs->ebp;
+    req->x86_regs.rsi = regs->esi;
+    req->x86_regs.rdi = regs->edi;
+
+    req->x86_regs.r8  = regs->r8;
+    req->x86_regs.r9  = regs->r9;
+    req->x86_regs.r10 = regs->r10;
+    req->x86_regs.r11 = regs->r11;
+    req->x86_regs.r12 = regs->r12;
+    req->x86_regs.r13 = regs->r13;
+    req->x86_regs.r14 = regs->r14;
+    req->x86_regs.r15 = regs->r15;
+
+    req->x86_regs.rflags = regs->eflags;
+    req->x86_regs.rip    = regs->eip;
+
+    req->x86_regs.dr7 = curr->arch.debugreg[7];
+    req->x86_regs.cr0 = ctxt.cr0;
+    req->x86_regs.cr2 = ctxt.cr2;
+    req->x86_regs.cr3 = ctxt.cr3;
+    req->x86_regs.cr4 = ctxt.cr4;
+
+    req->x86_regs.sysenter_cs = ctxt.sysenter_cs;
+    req->x86_regs.sysenter_esp = ctxt.sysenter_esp;
+    req->x86_regs.sysenter_eip = ctxt.sysenter_eip;
+
+    req->x86_regs.msr_efer = ctxt.msr_efer;
+    req->x86_regs.msr_star = ctxt.msr_star;
+    req->x86_regs.msr_lstar = ctxt.msr_lstar;
+
+    hvm_get_segment_register(curr, x86_seg_fs, &seg);
+    req->x86_regs.fs_base = seg.base;
+
+    hvm_get_segment_register(curr, x86_seg_gs, &seg);
+    req->x86_regs.gs_base = seg.base;
+
+    hvm_get_segment_register(curr, x86_seg_cs, &seg);
+    req->x86_regs.cs_arbytes = seg.attr.bytes;
+}
+
 bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, 
                           bool_t access_r, bool_t access_w, bool_t access_x,
                           mem_event_request_t **req_ptr)
@@ -1410,6 +1465,8 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
         req->access_x = access_x;
     
         req->vcpu_id = v->vcpu_id;
+
+        p2m_mem_event_fill_regs(req);
     }
 
     /* Pause the current VCPU */
diff --git a/xen/include/public/mem_event.h b/xen/include/public/mem_event.h
index 3831b41..bbd1636 100644
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -48,6 +48,44 @@
 #define MEM_EVENT_REASON_MSR         7    /* MSR was hit: gfn is MSR value, gla is MSR address;
                                              does NOT honour HVMPME_onchangeonly */
 
+/* Using a custom struct (not hvm_hw_cpu) so as to not fill
+ * the mem_event ring buffer too quickly. */
+struct mem_event_regs_x86 {
+    uint64_t rax;
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbx;
+    uint64_t rsp;
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+    uint64_t rflags;
+    uint64_t dr7;
+    uint64_t rip;
+    uint64_t cr0;
+    uint64_t cr2;
+    uint64_t cr3;
+    uint64_t cr4;
+    uint64_t sysenter_cs;
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+    uint64_t msr_efer;
+    uint64_t msr_star;
+    uint64_t msr_lstar;
+    uint64_t fs_base;
+    uint64_t gs_base;
+    uint32_t cs_arbytes;
+    uint32_t _pad;
+};
+
 typedef struct mem_event_st {
     uint32_t flags;
     uint32_t vcpu_id;
@@ -65,6 +103,7 @@ typedef struct mem_event_st {
     uint16_t available:12;
 
     uint16_t reason;
+    struct mem_event_regs_x86 x86_regs;
 } mem_event_request_t, mem_event_response_t;
 
 DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH RFC V6 3/5] xen, libxc: Force-enable relevant MSR events
  2014-08-11 15:08 [PATCH RFC V6 1/5] xen: Emulate with no writes Razvan Cojocaru
  2014-08-11 15:08 ` [PATCH RFC V6 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
@ 2014-08-11 15:08 ` Razvan Cojocaru
  2014-08-12 15:06   ` Jan Beulich
  2014-08-11 15:08 ` [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-11 15:08 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, jbeulich, eddie.dong, jun.nakajima

Vmx_disable_intercept_for_msr() will now refuse to disable interception of
MSRs needed for memory introspection. It is not possible to gate this on
mem_access being active for the domain, since by the time mem_access does
become active the interception for the interesting MSRs has already been
disabled (vmx_disable_intercept_for_msr() runs very early on).

Changes since V1:
 - Replaced printk() with gdprintk(XENLOG_DEBUG, ...).

Changes since V2:
 - Split a log line differently to keep it grepable.
 - Interception for relevant MSRs will be disabled only if
   mem_access is not enabled.
 - Since they end up being disabled early on (when mem_access
   is not enabled yet), re-enable interception when mem_access
   becomes active.

Changes since V3:
 - Removed log line stating that MSR interception cannot be disabled.
 - Removed superfluous #include <asm/hvm/vmx/vmcs.h>.
 - Moved VMX-specific code to vmx.c (as a new hvm_funcs member).

Changes since V5:
 - Added xc_mem_access_enable_introspection() to libxc, which has
   the same parameters and semantics as xc_mem_access_enable(),
   but it additionally sets d->arch.hvm_domain.introspection_enabled
   and enables relevant MSR interception.
 - Renamed vmx_enable_intro_msr_interception() to
   vmx_enable_msr_exit_interception().
 - Simplified vmx_enable_msr_exit_interception() (const array of MSRs).

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 tools/libxc/xc_mem_access.c      |   10 +++++++++-
 tools/libxc/xc_mem_event.c       |    7 +++++--
 tools/libxc/xc_private.h         |    2 +-
 tools/libxc/xenctrl.h            |    2 ++
 xen/arch/x86/hvm/vmx/vmcs.c      |   21 +++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c       |   18 ++++++++++++++++++
 xen/arch/x86/mm/mem_event.c      |   13 +++++++++++++
 xen/include/asm-x86/hvm/domain.h |    2 ++
 xen/include/asm-x86/hvm/hvm.h    |    2 ++
 xen/include/public/domctl.h      |    7 ++++---
 10 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/tools/libxc/xc_mem_access.c b/tools/libxc/xc_mem_access.c
index 461f0e9..55d0e9f 100644
--- a/tools/libxc/xc_mem_access.c
+++ b/tools/libxc/xc_mem_access.c
@@ -26,7 +26,15 @@
 
 void *xc_mem_access_enable(xc_interface *xch, domid_t domain_id, uint32_t *port)
 {
-    return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN, port);
+    return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN,
+                               port, 0);
+}
+
+void *xc_mem_access_enable_introspection(xc_interface *xch, domid_t domain_id,
+                                         uint32_t *port)
+{
+    return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN,
+                               port, 1);
 }
 
 int xc_mem_access_disable(xc_interface *xch, domid_t domain_id)
diff --git a/tools/libxc/xc_mem_event.c b/tools/libxc/xc_mem_event.c
index faf1cc6..8c0be4e 100644
--- a/tools/libxc/xc_mem_event.c
+++ b/tools/libxc/xc_mem_event.c
@@ -57,7 +57,7 @@ int xc_mem_event_memop(xc_interface *xch, domid_t domain_id,
 }
 
 void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
-                          uint32_t *port)
+                          uint32_t *port, int enable_introspection)
 {
     void *ring_page = NULL;
     uint64_t pfn;
@@ -120,7 +120,10 @@ void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
         break;
 
     case HVM_PARAM_ACCESS_RING_PFN:
-        op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE;
+        if ( enable_introspection )
+            op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION;
+        else
+            op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE;
         mode = XEN_DOMCTL_MEM_EVENT_OP_ACCESS;
         break;
 
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index c50a7c9..94df688 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -381,6 +381,6 @@ int xc_mem_event_memop(xc_interface *xch, domid_t domain_id,
  * param can be HVM_PARAM_PAGING/ACCESS/SHARING_RING_PFN
  */
 void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param,
-                          uint32_t *port);
+                          uint32_t *port, int enable_introspection);
 
 #endif /* __XC_PRIVATE_H__ */
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 5beb846..f7785ec 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -2263,6 +2263,8 @@ int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
  * Caller has to unmap this page when done.
  */
 void *xc_mem_access_enable(xc_interface *xch, domid_t domain_id, uint32_t *port);
+void *xc_mem_access_enable_introspection(xc_interface *xch, domid_t domain_id,
+                                         uint32_t *port);
 int xc_mem_access_disable(xc_interface *xch, domid_t domain_id);
 int xc_mem_access_resume(xc_interface *xch, domid_t domain_id);
 
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 8ffc562..2ac2872 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -39,6 +39,7 @@
 #include <xen/keyhandler.h>
 #include <asm/shadow.h>
 #include <asm/tboot.h>
+#include <asm/mem_event.h>
 
 static bool_t __read_mostly opt_vpid_enabled = 1;
 boolean_param("vpid", opt_vpid_enabled);
@@ -695,11 +696,31 @@ static void vmx_set_host_env(struct vcpu *v)
 void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
 {
     unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
+    struct domain *d = v->domain;
 
     /* VMX MSR bitmap supported? */
     if ( msr_bitmap == NULL )
         return;
 
+    if ( mem_event_check_ring(&d->mem_event->access) &&
+         d->arch.hvm_domain.introspection_enabled )
+    {
+        /* Filter out MSR-s needed for memory introspection */
+        switch ( msr )
+        {
+        case MSR_IA32_SYSENTER_EIP:
+        case MSR_IA32_SYSENTER_ESP:
+        case MSR_IA32_SYSENTER_CS:
+        case MSR_IA32_MC0_CTL:
+        case MSR_STAR:
+        case MSR_LSTAR:
+            return;
+
+        default:
+            break;
+        }
+    }
+
     /*
      * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
      * have the write-low and read-high bitmap offsets the wrong way round.
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 2caa04a..4cab482 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1682,6 +1682,23 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
         *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
 }
 
+static void vmx_enable_msr_exit_interception(struct domain *d)
+{
+    struct vcpu *v;
+    const u32 msrs[] = { MSR_IA32_SYSENTER_EIP,
+                         MSR_IA32_SYSENTER_ESP,
+                         MSR_IA32_SYSENTER_CS,
+                         MSR_IA32_MC0_CTL,
+                         MSR_STAR,
+                         MSR_LSTAR };
+    int i;
+
+    /* Enable interception for MSRs needed for memory introspection. */
+    for_each_vcpu ( d, v )
+        for ( i = 0; i < ARRAY_SIZE(msrs); i++ )
+            vmx_enable_intercept_for_msr(v, msrs[i], MSR_TYPE_W);
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -1740,6 +1757,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .handle_eoi           = vmx_handle_eoi,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
+    .enable_msr_exit_interception = vmx_enable_msr_exit_interception,
 };
 
 const struct hvm_function_table * __init start_vmx(void)
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index ba7e71e..84a1d8d 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -587,6 +587,7 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
         switch( mec->op )
         {
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
+        case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION:
         {
             rc = -ENODEV;
             /* Only HAP is supported */
@@ -600,13 +601,25 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
             rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
                                     HVM_PARAM_ACCESS_RING_PFN,
                                     mem_access_notification);
+
+            if (  mec->op == XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE )
+                break;
+
+            if ( rc == 0 && hvm_funcs.enable_msr_exit_interception )
+            {
+                d->arch.hvm_domain.introspection_enabled = 1;
+                hvm_funcs.enable_msr_exit_interception(d);
+            }
         }
         break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
         {
             if ( med->ring_page )
+            {
                 rc = mem_event_disable(d, med);
+                d->arch.hvm_domain.introspection_enabled = 0;
+            }
         }
         break;
 
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 291a2e0..947b061 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -145,6 +145,8 @@ struct hvm_domain {
         struct vmx_domain vmx;
         struct svm_domain svm;
     };
+
+    bool_t introspection_enabled;
 };
 
 #define hap_enabled(d)  ((d)->arch.hvm_domain.hap_enabled)
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0ebd478..069ba39 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -205,6 +205,8 @@ struct hvm_function_table {
     void (*hypervisor_cpuid_leaf)(uint32_t sub_idx,
                                   uint32_t *eax, uint32_t *ebx,
                                   uint32_t *ecx, uint32_t *edx);
+
+    void (*enable_msr_exit_interception)(struct domain *d);
 };
 
 extern struct hvm_function_table hvm_funcs;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 5b11bbf..a3431ec 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -772,10 +772,11 @@ struct xen_domctl_gdbsx_domstatus {
  * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest
  * EBUSY  - guest has or had access enabled, ring buffer still active
  */
-#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS            2
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS                        2
 
-#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE     0
-#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE    1
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE                 0
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE                1
+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION   2
 
 /*
  * Sharing ENOMEM helper.
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-11 15:08 [PATCH RFC V6 1/5] xen: Emulate with no writes Razvan Cojocaru
  2014-08-11 15:08 ` [PATCH RFC V6 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
  2014-08-11 15:08 ` [PATCH RFC V6 3/5] xen, libxc: Force-enable relevant MSR events Razvan Cojocaru
@ 2014-08-11 15:08 ` Razvan Cojocaru
  2014-08-12 15:16   ` Jan Beulich
  2014-08-11 15:08 ` [PATCH RFC V6 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
  2014-08-12 14:57 ` [PATCH RFC V6 1/5] xen: Emulate with no writes Jan Beulich
  4 siblings, 1 reply; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-11 15:08 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, jbeulich, eddie.dong, jun.nakajima

Added new XEN_DOMCTL_set_pagefault_info hypercall, used by libxc's
new xc_domain_set_pagefault_info() function to set per-domain page
fault injection information. All a call does is set per-domain info,
and nothing actually happens until VMENTRY time, and then only if
all conditions are met (the guest is in user mode, the set value
matches CR3, and there are no other pending traps).
This mechanism allows bringing in swapped-out pages for inspection.

Changes since V2:
 - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
 - Now checking SS.DPL instead of CS.DPL to see if the guest is in
   user mode.
 - Removed superfluous fault_info members initialization.
 - Now returning -EINVAL if !has_hvm_container_domain(d) on
   XEN_DOMCTL_set_pagefault_info.
 - Moved struct fault_info from struct domain to struct hvm_domain.
 - Collapsed explicit initialization of the fault_info members into a
   one-line assignment on the tools/libxc side.
 - Now using write_access as a proper bool instead of shifting and
   OR-ing it with PFEC_user_mode.

Changes since V3:
 - Now checking that is_hvm_domain(d) in do_domctl() (consistent with
   the test in the actual page fault injection code).
 - XEN_DOMCTL_set_pagefault_info now extends the proper range.
 - Added a likely() around the
   d->arch.hvm_domain.fault_info.virtual_address == 0 check.
 - Replaced hvm_funcs.save_cpu_ctxt() function call with lighter code.
 - Split the vmx.c page fault injection function into a check
   function and an inject function.

Changes since V4:
 - Added "valid" flag instead of relying on virtual address 0 as a
   special case.
 - Using #defines instead of magic constants in vmx_vmcs_save().
 - Padded struct xen_domctl_set_pagefault_info.
 - Coding style and readability changes.

Changes since V5:
 - Renamed "struct domain *d" to "currd" where appropriate.
 - Using ! to test bool_t variables.
 - Replaced hvm_get_segment_register() with
   vmx_get_segment_register() in vmx.c.
 - Removed double blank line.
 - Proper comparison of CR3 and address_space (taking into account
   LM, PAE).
 - Using domain variable already set up in do_domctl().
 - Removed optional parentheses.
 - Removed d->arch.hvm_domain.fault_info reset (only the "valid"
   flag counts anyway).

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 tools/libxc/xc_domain.c          |   16 +++++++++++
 tools/libxc/xenctrl.h            |    4 +++
 xen/arch/x86/hvm/vmx/vmx.c       |   59 ++++++++++++++++++++++++++++++++++++++
 xen/common/domctl.c              |   19 ++++++++++++
 xen/include/asm-x86/hvm/domain.h |    8 ++++++
 xen/include/public/domctl.h      |   15 ++++++++++
 6 files changed, 121 insertions(+)

diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 0230c6c..49f0c61 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -506,6 +506,22 @@ int xc_domain_hvm_setcontext(xc_interface *xch,
     return ret;
 }
 
+int xc_domain_set_pagefault_info(xc_interface *xch,
+                                 uint32_t domid,
+                                 xen_domctl_set_pagefault_info_t *info)
+{
+    DECLARE_DOMCTL;
+
+    if (info == NULL)
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_set_pagefault_info;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_pagefault_info = *info;
+
+    return do_domctl(xch, &domctl);
+}
+
 int xc_vcpu_getcontext(xc_interface *xch,
                        uint32_t domid,
                        uint32_t vcpu,
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index f7785ec..85273b9 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -803,6 +803,10 @@ int xc_domain_hvm_setcontext(xc_interface *xch,
 const char *xc_domain_get_native_protocol(xc_interface *xch,
                                           uint32_t domid);
 
+int xc_domain_set_pagefault_info(xc_interface *xch,
+                                 uint32_t domid,
+                                 xen_domctl_set_pagefault_info_t *info);
+
 /**
  * This function returns information about the execution context of a
  * particular vcpu of a domain.
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 4cab482..4dba062 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3115,6 +3115,62 @@ out:
         nvmx_idtv_handling();
 }
 
+static bool_t vmx_check_pf_injection(void)
+{
+    struct vcpu *curr = current;
+    const struct domain *currd = curr->domain;
+    struct segment_register seg;
+    unsigned long ev;
+    uint32_t pending_event = 0;
+    uint64_t mask;
+
+    if ( !is_hvm_domain(currd) ||
+         likely(!currd->arch.hvm_domain.fault_info.valid) )
+        return 0;
+
+    vmx_get_segment_register(curr, x86_seg_ss, &seg);
+
+    if ( seg.attr.fields.dpl != 3 ) /* Guest is not in user mode */
+        return 0;
+
+    if ( hvm_long_mode_enabled(curr) )
+        mask = 0x000ffffffffff000; /* Bits 51:12. */
+    else if ( hvm_pae_enabled(curr) )
+        mask = 0x00000000ffffffe0; /* Bits 31:5. */
+    else
+        mask = 0x00000000fffff000; /* Bits 31:12. */
+
+    if ( (curr->arch.hvm_vcpu.guest_cr[3] & mask) !=
+         (currd->arch.hvm_domain.fault_info.address_space & mask) )
+        return 0;
+
+    vmx_vmcs_enter(curr);
+    __vmread(VM_ENTRY_INTR_INFO, &ev);
+
+    if ( (ev & INTR_INFO_VALID_MASK) &&
+         hvm_event_needs_reinjection(MASK_EXTR(ev, INTR_INFO_INTR_TYPE_MASK),
+                                     ev & INTR_INFO_VECTOR_MASK) )
+        pending_event = ev;
+
+    vmx_vmcs_exit(curr);
+    return pending_event == 0;
+}
+
+static void vmx_inject_pf(void)
+{
+    struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
+    int errcode = PFEC_user_mode;
+    uint64_t virtual_address = currd->arch.hvm_domain.fault_info.virtual_address;
+
+    currd->arch.hvm_domain.fault_info.valid = 0;
+
+    if ( currd->arch.hvm_domain.fault_info.write_access )
+        errcode |= PFEC_write_access;
+
+    hvm_inject_page_fault(errcode, virtual_address);
+}
+
 void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3155,6 +3211,9 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(need_flush) )
         vpid_sync_all();
 
+    if ( vmx_check_pf_injection() )
+        vmx_inject_pf();
+
  out:
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index c326aba..b56bd69 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -967,6 +967,25 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
     }
     break;
 
+    case XEN_DOMCTL_set_pagefault_info:
+    {
+        ret = -EINVAL;
+
+        if ( is_hvm_domain(d) )
+        {
+            d->arch.hvm_domain.fault_info.address_space =
+                op->u.set_pagefault_info.address_space;
+            d->arch.hvm_domain.fault_info.virtual_address =
+                op->u.set_pagefault_info.virtual_address;
+            d->arch.hvm_domain.fault_info.write_access =
+                op->u.set_pagefault_info.write_access;
+            d->arch.hvm_domain.fault_info.valid = 1;
+
+            ret = 0;
+        }
+    }
+    break;
+
     default:
         ret = arch_do_domctl(op, d, u_domctl);
         break;
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 947b061..f6d61ce 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -141,6 +141,14 @@ struct hvm_domain {
      */
     uint64_t sync_tsc;
 
+    /* Memory introspection page fault injection data. */
+    struct {
+        uint64_t address_space;
+        uint64_t virtual_address;
+        uint32_t write_access;
+        bool_t valid;
+    } fault_info;
+
     union {
         struct vmx_domain vmx;
         struct svm_domain svm;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index a3431ec..86b0de6 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -937,6 +937,19 @@ typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
 #endif
 
+/* XEN_DOMCTL_set_pagefault_info requests that a page fault occur at
+ * the next VMENTRY.
+ */
+struct xen_domctl_set_pagefault_info {
+    uint64_t address_space;
+    uint64_t virtual_address;
+    uint32_t write_access;
+    uint32_t _pad;
+};
+typedef struct xen_domctl_set_pagefault_info xen_domctl_set_pagefault_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_pagefault_info_t);
+
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -1009,6 +1022,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_cacheflush                    71
 #define XEN_DOMCTL_get_vcpu_msrs                 72
 #define XEN_DOMCTL_set_vcpu_msrs                 73
+#define XEN_DOMCTL_set_pagefault_info            74
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1069,6 +1083,7 @@ struct xen_domctl {
         struct xen_domctl_cacheflush        cacheflush;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
+        struct xen_domctl_set_pagefault_info set_pagefault_info;
         uint8_t                             pad[128];
     } u;
 };
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH RFC V6 5/5] xen: Handle resumed instruction based on previous mem_event reply
  2014-08-11 15:08 [PATCH RFC V6 1/5] xen: Emulate with no writes Razvan Cojocaru
                   ` (2 preceding siblings ...)
  2014-08-11 15:08 ` [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
@ 2014-08-11 15:08 ` Razvan Cojocaru
  2014-08-12 15:20   ` Jan Beulich
  2014-08-12 14:57 ` [PATCH RFC V6 1/5] xen: Emulate with no writes Jan Beulich
  4 siblings, 1 reply; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-11 15:08 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, jbeulich, eddie.dong, jun.nakajima

In a scenario where a page fault that triggered a mem_event occured,
p2m_mem_access_check() will now be able to either 1) emulate the
current instruction, or 2) emulate it, but don't allow it to perform
any writes.

Changes since V1:
 - Removed the 'skip' code which required computing the current
   instruction length.
 - Removed the set_ad_bits() code that attempted to modify the
   'accessed' and 'dirty' bits for instructions that the emulator
   can't handle at the moment.

Changes since V2:
 - Moved the __vmread(EXIT_QUALIFICATION, &exit_qualification); code
   in vmx.c, accessible via hvm_funcs.
 - Incorporated changes by Andrew Cooper ("[PATCH 1/2] Xen/mem_event:
   Validate the response vcpu_id before acting on it."

Changes since V3:
 - Collapsed verbose lines into a single "else if()".
 - Changed an int to bool_t.
 - Fixed a minor coding style issue.
 - Now computing the first parameter to hvm_emulate_one_full()
   (replaced an "if" with a single call).
 - Added code comments about eip and gla reset (clarity issue).
 - Removed duplicate code by Andrew Cooper (introduced in V2,
   since committed).

Changes since V4:
 - Renamed "exited_by_pagefault" to "exited_by_nested_pagefault".
 - Folded two long lines.
 - Combined two "if" statements.
 - Moved "violation = 0;" to the XENMEM_access_rwx case.

Changes since V5:
 - Fixed indentation.
 - Re-ordered a set of if() conditions.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/arch/x86/domain.c          |    3 ++
 xen/arch/x86/hvm/vmx/vmx.c     |   13 +++++++
 xen/arch/x86/mm/p2m.c          |   84 ++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/domain.h   |    9 +++++
 xen/include/asm-x86/hvm/hvm.h  |    2 +
 xen/include/public/mem_event.h |   12 +++---
 6 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index f7e0e78..7b1dfe6 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -415,6 +415,9 @@ int vcpu_initialise(struct vcpu *v)
 
     v->arch.flags = TF_kernel_mode;
 
+    /* By default, do not emulate */
+    v->arch.mem_event.emulate_flags = 0;
+
     rc = mapcache_vcpu_init(v);
     if ( rc )
         return rc;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 4dba062..5988c7a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1699,6 +1699,18 @@ static void vmx_enable_msr_exit_interception(struct domain *d)
             vmx_enable_intercept_for_msr(v, msrs[i], MSR_TYPE_W);
 }
 
+static bool_t vmx_exited_by_nested_pagefault(void)
+{
+    unsigned long exit_qualification;
+
+    __vmread(EXIT_QUALIFICATION, &exit_qualification);
+
+    if ( (exit_qualification & EPT_GLA_FAULT) == 0 )
+        return 0;
+
+    return 1;
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -1758,6 +1770,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
     .enable_msr_exit_interception = vmx_enable_msr_exit_interception,
+    .exited_by_nested_pagefault  = vmx_exited_by_nested_pagefault,
 };
 
 const struct hvm_function_table * __init start_vmx(void)
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 41a316a..98b2eba 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1391,6 +1391,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
     p2m_access_t p2ma;
     mem_event_request_t *req;
     int rc;
+    unsigned long eip = guest_cpu_user_regs()->eip;
 
     /* First, handle rx2rw conversion automatically.
      * These calls to p2m->set_entry() must succeed: we have the gfn
@@ -1443,6 +1444,36 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
             return 1;
         }
     }
+    else if ( v->arch.mem_event.emulate_flags == 0 &&
+              hvm_funcs.exited_by_nested_pagefault &&
+              !hvm_funcs.exited_by_nested_pagefault() ) /* don't send a mem_event */
+    {
+        v->arch.mem_event.emulate_flags = MEM_EVENT_FLAG_EMULATE;
+        v->arch.mem_event.gpa = gpa;
+        v->arch.mem_event.eip = eip;
+    }
+
+    /* The previous mem_event reply does not match the current state. */
+    if ( v->arch.mem_event.gpa != gpa || v->arch.mem_event.eip != eip )
+    {
+        /* Don't emulate the current instruction, send a new mem_event. */
+        v->arch.mem_event.emulate_flags = 0;
+
+        /* Make sure to mark the current state to match it again against
+         * the new mem_event about to be sent. */
+        v->arch.mem_event.gpa = gpa;
+        v->arch.mem_event.eip = eip;
+    }
+
+    if ( v->arch.mem_event.emulate_flags )
+    {
+        hvm_emulate_one_full((v->arch.mem_event.emulate_flags &
+                              MEM_EVENT_FLAG_EMULATE_NOWRITE) != 0,
+                             TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
+
+        v->arch.mem_event.emulate_flags = 0;
+        return 1;
+    }
 
     *req_ptr = NULL;
     req = xzalloc(mem_event_request_t);
@@ -1495,6 +1526,59 @@ void p2m_mem_access_resume(struct domain *d)
 
         v = d->vcpu[rsp.vcpu_id];
 
+        /* Mark vcpu for skipping one instruction upon rescheduling */
+        if ( rsp.flags & MEM_EVENT_FLAG_EMULATE )
+        {
+            xenmem_access_t access;
+            bool_t violation = 1;
+
+            v->arch.mem_event.emulate_flags = 0;
+
+            if ( p2m_get_mem_access(d, rsp.gfn, &access) == 0 )
+            {
+                switch ( access )
+                {
+                case XENMEM_access_n:
+                case XENMEM_access_n2rwx:
+                default:
+                    violation = rsp.access_r || rsp.access_w || rsp.access_x;
+                    break;
+
+                case XENMEM_access_r:
+                    violation = rsp.access_w || rsp.access_x;
+                    break;
+
+                case XENMEM_access_w:
+                    violation = rsp.access_r || rsp.access_x;
+                    break;
+
+                case XENMEM_access_x:
+                    violation = rsp.access_r || rsp.access_w;
+                    break;
+
+                case XENMEM_access_rx:
+                case XENMEM_access_rx2rw:
+                    violation = rsp.access_w;
+                    break;
+
+                case XENMEM_access_wx:
+                    violation = rsp.access_r;
+                    break;
+
+                case XENMEM_access_rw:
+                    violation = rsp.access_x;
+                    break;
+
+                case XENMEM_access_rwx:
+                    violation = 0;
+                    break;
+                }
+            }
+
+            if ( violation )
+                v->arch.mem_event.emulate_flags = rsp.flags;
+        }
+
         /* Unpause domain */
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
             mem_event_vcpu_unpause(v);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 83329ed..440aa81 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -458,6 +458,15 @@ struct arch_vcpu
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
+
+    /* Should we emulate the next matching instruction on VCPU resume
+     * after a mem_event? */
+    struct {
+        uint32_t emulate_flags;
+        unsigned long gpa;
+        unsigned long eip;
+    } mem_event;
+
 } __cacheline_aligned;
 
 smap_check_policy_t smap_policy_change(struct vcpu *v,
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 069ba39..8f0292c 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -207,6 +207,8 @@ struct hvm_function_table {
                                   uint32_t *ecx, uint32_t *edx);
 
     void (*enable_msr_exit_interception)(struct domain *d);
+
+    bool_t (*exited_by_nested_pagefault)(void);
 };
 
 extern struct hvm_function_table hvm_funcs;
diff --git a/xen/include/public/mem_event.h b/xen/include/public/mem_event.h
index bbd1636..49f1b84 100644
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -31,11 +31,13 @@
 #include "io/ring.h"
 
 /* Memory event flags */
-#define MEM_EVENT_FLAG_VCPU_PAUSED  (1 << 0)
-#define MEM_EVENT_FLAG_DROP_PAGE    (1 << 1)
-#define MEM_EVENT_FLAG_EVICT_FAIL   (1 << 2)
-#define MEM_EVENT_FLAG_FOREIGN      (1 << 3)
-#define MEM_EVENT_FLAG_DUMMY        (1 << 4)
+#define MEM_EVENT_FLAG_VCPU_PAUSED     (1 << 0)
+#define MEM_EVENT_FLAG_DROP_PAGE       (1 << 1)
+#define MEM_EVENT_FLAG_EVICT_FAIL      (1 << 2)
+#define MEM_EVENT_FLAG_FOREIGN         (1 << 3)
+#define MEM_EVENT_FLAG_DUMMY           (1 << 4)
+#define MEM_EVENT_FLAG_EMULATE         (1 << 5)
+#define MEM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6)
 
 /* Reasons for the memory event request */
 #define MEM_EVENT_REASON_UNKNOWN     0    /* typical reason */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 1/5] xen: Emulate with no writes
  2014-08-11 15:08 [PATCH RFC V6 1/5] xen: Emulate with no writes Razvan Cojocaru
                   ` (3 preceding siblings ...)
  2014-08-11 15:08 ` [PATCH RFC V6 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
@ 2014-08-12 14:57 ` Jan Beulich
  2014-08-12 15:08   ` Razvan Cojocaru
  4 siblings, 1 reply; 17+ messages in thread
From: Jan Beulich @ 2014-08-12 14:57 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
> +static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
> +    .read          = hvmemul_read,
> +    .insn_fetch    = hvmemul_insn_fetch,
> +    .write         = hvmemul_write_discard,
> +    .cmpxchg       = hvmemul_cmpxchg_discard,
> +    .rep_ins       = hvmemul_rep_ins_discard,
> +    .rep_outs      = hvmemul_rep_outs_discard,
> +    .rep_movs      = hvmemul_rep_movs_discard,
> +    .read_segment  = hvmemul_read_segment,
> +    .write_segment = hvmemul_write_segment,
> +    .read_io       = hvmemul_read_io_discard,
> +    .write_io      = hvmemul_write_io_discard,
> +    .read_cr       = hvmemul_read_cr,
> +    .write_cr      = hvmemul_write_cr,
> +    .read_msr      = hvmemul_read_msr,
> +    .write_msr     = hvmemul_write_msr,
> +    .wbinvd        = hvmemul_wbinvd,

How about these last two?

> +void hvm_emulate_one_full(bool_t nowrite, unsigned int trapnr,
> +    unsigned int errcode)
> +{
> +    struct hvm_emulate_ctxt ctx = {{ 0 }};
> +    int rc;
> +
> +    hvm_emulate_prepare(&ctx, guest_cpu_user_regs());
> +
> +    if ( nowrite )
> +        rc = hvm_emulate_one_no_write(&ctx);
> +    else
> +        rc = hvm_emulate_one(&ctx);
> +
> +    switch ( rc )
> +    {
> +    case X86EMUL_UNHANDLEABLE:
> +        gdprintk(XENLOG_DEBUG, "Emulation failed @ %04x:%lx: "
> +               "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
> +               hvmemul_get_seg_reg(x86_seg_cs, &ctx)->sel,
> +               ctx.insn_buf_eip,
> +               ctx.insn_buf[0], ctx.insn_buf[1],
> +               ctx.insn_buf[2], ctx.insn_buf[3],
> +               ctx.insn_buf[4], ctx.insn_buf[5],
> +               ctx.insn_buf[6], ctx.insn_buf[7],
> +               ctx.insn_buf[8], ctx.insn_buf[9]);
> +        hvm_inject_hw_exception(trapnr, errcode);
> +        break;
> +    case X86EMUL_EXCEPTION:
> +        if ( ctx.exn_pending )
> +            hvm_inject_hw_exception(ctx.exn_vector, ctx.exn_error_code);
> +        /* fall through */
> +    default:
> +        hvm_emulate_writeback(&ctx);

Shouldn't this be pulled out of the switch to also cover the exception
injection in the X86EMUL_UNHANDLEABLE case?

Jan

> +        break;
> +    }
> +}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 2/5] xen: Optimize introspection access to guest state
  2014-08-11 15:08 ` [PATCH RFC V6 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
@ 2014-08-12 15:01   ` Jan Beulich
  0 siblings, 0 replies; 17+ messages in thread
From: Jan Beulich @ 2014-08-12 15:01 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
> Speed optimization for introspection purposes: a handful of registers
> are sent along with each mem_event. This requires enlargement of the
> mem_event_request / mem_event_response stuctures, and additional code
> to fill in relevant values. Since the EPT event processing code needs
> more data than CR3 or MSR event processors, hvm_mem_event_fill_regs()
> fills in less data than p2m_mem_event_fill_regs(), in order to avoid
> overhead. Struct hvm_hw_cpu has been considered instead of the custom
> struct mem_event_regs_st, but its size would cause quick filling up
> of the mem_event ring buffer.
> 
> Changes since V1:
>  - Replaced guest_x86_mode with cs_arbytes in the mem_event_regs_st
>    structure.
>  - Removed superfluous preprocessor check for __x86_64__ in
>    p2m_mem_event_fill_regs().
> 
> Changes since V2:
>  - Removed inline function compiler suggestions.
>  - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
>  - Padded struct mem_event_regs_st.
>  - Renamed mem_event_regs_t to mem_event_regs.
>  - Readability changes.
> 
> Changes since V3:
>  - Changed the name of VCPU variables referring to the current
>    VCPU to "curr".
>  - Renamed "mem_event_regs" to "x86_mem_event_regs" to make it
>    explicit.
> 
> Changes since V4:
>  - Renamed "x86_mem_event_regs" to "mem_event_regs_x86" and
>    removed a typedef.
> 
> Changes since V5:
>  - Const-correctness.
> 
> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

Acked-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 3/5] xen, libxc: Force-enable relevant MSR events
  2014-08-11 15:08 ` [PATCH RFC V6 3/5] xen, libxc: Force-enable relevant MSR events Razvan Cojocaru
@ 2014-08-12 15:06   ` Jan Beulich
  2014-08-12 15:09     ` Razvan Cojocaru
  0 siblings, 1 reply; 17+ messages in thread
From: Jan Beulich @ 2014-08-12 15:06 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -39,6 +39,7 @@
>  #include <xen/keyhandler.h>
>  #include <asm/shadow.h>
>  #include <asm/tboot.h>
> +#include <asm/mem_event.h>
>  
>  static bool_t __read_mostly opt_vpid_enabled = 1;
>  boolean_param("vpid", opt_vpid_enabled);
> @@ -695,11 +696,31 @@ static void vmx_set_host_env(struct vcpu *v)
>  void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
>  {
>      unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
> +    struct domain *d = v->domain;
>  
>      /* VMX MSR bitmap supported? */
>      if ( msr_bitmap == NULL )
>          return;
>  
> +    if ( mem_event_check_ring(&d->mem_event->access) &&
> +         d->arch.hvm_domain.introspection_enabled )
> +    {
> +        /* Filter out MSR-s needed for memory introspection */
> +        switch ( msr )
> +        {
> +        case MSR_IA32_SYSENTER_EIP:
> +        case MSR_IA32_SYSENTER_ESP:
> +        case MSR_IA32_SYSENTER_CS:
> +        case MSR_IA32_MC0_CTL:
> +        case MSR_STAR:
> +        case MSR_LSTAR:
> +            return;

So you're adding an array further down, but just to use it there? My
main point in asking for something like that was to have a _single_
place where all the relevant MSRs get enumerated.

> +
> +        default:
> +            break;

This is pointless.

> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1682,6 +1682,23 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
>          *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
>  }
>  
> +static void vmx_enable_msr_exit_interception(struct domain *d)
> +{
> +    struct vcpu *v;
> +    const u32 msrs[] = { MSR_IA32_SYSENTER_EIP,
> +                         MSR_IA32_SYSENTER_ESP,
> +                         MSR_IA32_SYSENTER_CS,
> +                         MSR_IA32_MC0_CTL,
> +                         MSR_STAR,
> +                         MSR_LSTAR };

static

> +    int i;

unsigned

Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 1/5] xen: Emulate with no writes
  2014-08-12 14:57 ` [PATCH RFC V6 1/5] xen: Emulate with no writes Jan Beulich
@ 2014-08-12 15:08   ` Razvan Cojocaru
  2014-08-12 15:27     ` Jan Beulich
  0 siblings, 1 reply; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-12 15:08 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/12/2014 05:57 PM, Jan Beulich wrote:
>>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
>> +static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
>> +    .read          = hvmemul_read,
>> +    .insn_fetch    = hvmemul_insn_fetch,
>> +    .write         = hvmemul_write_discard,
>> +    .cmpxchg       = hvmemul_cmpxchg_discard,
>> +    .rep_ins       = hvmemul_rep_ins_discard,
>> +    .rep_outs      = hvmemul_rep_outs_discard,
>> +    .rep_movs      = hvmemul_rep_movs_discard,
>> +    .read_segment  = hvmemul_read_segment,
>> +    .write_segment = hvmemul_write_segment,
>> +    .read_io       = hvmemul_read_io_discard,
>> +    .write_io      = hvmemul_write_io_discard,
>> +    .read_cr       = hvmemul_read_cr,
>> +    .write_cr      = hvmemul_write_cr,
>> +    .read_msr      = hvmemul_read_msr,
>> +    .write_msr     = hvmemul_write_msr,
>> +    .wbinvd        = hvmemul_wbinvd,
> 
> How about these last two?

It would likely be safer to provide discard versions of those as well,
thank you for pointing that out.

>> +void hvm_emulate_one_full(bool_t nowrite, unsigned int trapnr,
>> +    unsigned int errcode)
>> +{
>> +    struct hvm_emulate_ctxt ctx = {{ 0 }};
>> +    int rc;
>> +
>> +    hvm_emulate_prepare(&ctx, guest_cpu_user_regs());
>> +
>> +    if ( nowrite )
>> +        rc = hvm_emulate_one_no_write(&ctx);
>> +    else
>> +        rc = hvm_emulate_one(&ctx);
>> +
>> +    switch ( rc )
>> +    {
>> +    case X86EMUL_UNHANDLEABLE:
>> +        gdprintk(XENLOG_DEBUG, "Emulation failed @ %04x:%lx: "
>> +               "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
>> +               hvmemul_get_seg_reg(x86_seg_cs, &ctx)->sel,
>> +               ctx.insn_buf_eip,
>> +               ctx.insn_buf[0], ctx.insn_buf[1],
>> +               ctx.insn_buf[2], ctx.insn_buf[3],
>> +               ctx.insn_buf[4], ctx.insn_buf[5],
>> +               ctx.insn_buf[6], ctx.insn_buf[7],
>> +               ctx.insn_buf[8], ctx.insn_buf[9]);
>> +        hvm_inject_hw_exception(trapnr, errcode);
>> +        break;
>> +    case X86EMUL_EXCEPTION:
>> +        if ( ctx.exn_pending )
>> +            hvm_inject_hw_exception(ctx.exn_vector, ctx.exn_error_code);
>> +        /* fall through */
>> +    default:
>> +        hvm_emulate_writeback(&ctx);
> 
> Shouldn't this be pulled out of the switch to also cover the exception
> injection in the X86EMUL_UNHANDLEABLE case?

I'm not sure, that's the way it's handled in xen/arch/x86/hvm/io.c
(handle_mmio()):

 80 int handle_mmio(void)
 81 {
 82     struct hvm_emulate_ctxt ctxt;
 83     struct vcpu *curr = current;
 84     struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
 85     int rc;
 86
 87     ASSERT(!is_pvh_vcpu(curr));
 88
 89     hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
 90
 91     rc = hvm_emulate_one(&ctxt);
 92
 93     if ( rc != X86EMUL_RETRY )
 94         vio->io_state = HVMIO_none;
 95     if ( vio->io_state == HVMIO_awaiting_completion )
 96         vio->io_state = HVMIO_handle_mmio_awaiting_completion;
 97     else
 98         vio->mmio_gva = 0;
 99
100     switch ( rc )
101     {
102     case X86EMUL_UNHANDLEABLE:
103         gdprintk(XENLOG_WARNING,
104                  "MMIO emulation failed @ %04x:%lx: "
105                  "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
106                  hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel,
107                  ctxt.insn_buf_eip,
108                  ctxt.insn_buf[0], ctxt.insn_buf[1],
109                  ctxt.insn_buf[2], ctxt.insn_buf[3],
110                  ctxt.insn_buf[4], ctxt.insn_buf[5],
111                  ctxt.insn_buf[6], ctxt.insn_buf[7],
112                  ctxt.insn_buf[8], ctxt.insn_buf[9]);
113         return 0;
114     case X86EMUL_EXCEPTION:
115         if ( ctxt.exn_pending )
116             hvm_inject_hw_exception(ctxt.exn_vector,
ctxt.exn_error_code);
117         break;
118     default:
119         break;
120     }
121
122     hvm_emulate_writeback(&ctxt);
123
124     return 1;
125 }

There's a return there in the X86EMUL_UNHANDLEABLE case, so
hvm_emulate_writeback(&ctxt) doesn't get called.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 3/5] xen, libxc: Force-enable relevant MSR events
  2014-08-12 15:06   ` Jan Beulich
@ 2014-08-12 15:09     ` Razvan Cojocaru
  0 siblings, 0 replies; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-12 15:09 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/12/2014 06:06 PM, Jan Beulich wrote:
>>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>> @@ -39,6 +39,7 @@
>>  #include <xen/keyhandler.h>
>>  #include <asm/shadow.h>
>>  #include <asm/tboot.h>
>> +#include <asm/mem_event.h>
>>  
>>  static bool_t __read_mostly opt_vpid_enabled = 1;
>>  boolean_param("vpid", opt_vpid_enabled);
>> @@ -695,11 +696,31 @@ static void vmx_set_host_env(struct vcpu *v)
>>  void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
>>  {
>>      unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
>> +    struct domain *d = v->domain;
>>  
>>      /* VMX MSR bitmap supported? */
>>      if ( msr_bitmap == NULL )
>>          return;
>>  
>> +    if ( mem_event_check_ring(&d->mem_event->access) &&
>> +         d->arch.hvm_domain.introspection_enabled )
>> +    {
>> +        /* Filter out MSR-s needed for memory introspection */
>> +        switch ( msr )
>> +        {
>> +        case MSR_IA32_SYSENTER_EIP:
>> +        case MSR_IA32_SYSENTER_ESP:
>> +        case MSR_IA32_SYSENTER_CS:
>> +        case MSR_IA32_MC0_CTL:
>> +        case MSR_STAR:
>> +        case MSR_LSTAR:
>> +            return;
> 
> So you're adding an array further down, but just to use it there? My
> main point in asking for something like that was to have a _single_
> place where all the relevant MSRs get enumerated.

You're right, I'll change the code (and modify it to address the rest of
your comments as well).


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-11 15:08 ` [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
@ 2014-08-12 15:16   ` Jan Beulich
  2014-08-12 15:42     ` Razvan Cojocaru
  0 siblings, 1 reply; 17+ messages in thread
From: Jan Beulich @ 2014-08-12 15:16 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
> +static bool_t vmx_check_pf_injection(void)
> +{
> +    struct vcpu *curr = current;
> +    const struct domain *currd = curr->domain;
> +    struct segment_register seg;
> +    unsigned long ev;
> +    uint32_t pending_event = 0;
> +    uint64_t mask;
> +
> +    if ( !is_hvm_domain(currd) ||
> +         likely(!currd->arch.hvm_domain.fault_info.valid) )
> +        return 0;
> +
> +    vmx_get_segment_register(curr, x86_seg_ss, &seg);
> +
> +    if ( seg.attr.fields.dpl != 3 ) /* Guest is not in user mode */
> +        return 0;
> +
> +    if ( hvm_long_mode_enabled(curr) )
> +        mask = 0x000ffffffffff000; /* Bits 51:12. */

PADDR_MASK & PAGE_MASK

> +    else if ( hvm_pae_enabled(curr) )
> +        mask = 0x00000000ffffffe0; /* Bits 31:5. */
> +    else
> +        mask = 0x00000000fffff000; /* Bits 31:12. */

And this would probably also better be (uint32_t)PAGE_MASK.

> +
> +    if ( (curr->arch.hvm_vcpu.guest_cr[3] & mask) !=
> +         (currd->arch.hvm_domain.fault_info.address_space & mask) )
> +        return 0;
> +
> +    vmx_vmcs_enter(curr);
> +    __vmread(VM_ENTRY_INTR_INFO, &ev);
> +
> +    if ( (ev & INTR_INFO_VALID_MASK) &&
> +         hvm_event_needs_reinjection(MASK_EXTR(ev, INTR_INFO_INTR_TYPE_MASK),
> +                                     ev & INTR_INFO_VECTOR_MASK) )
> +        pending_event = ev;
> +
> +    vmx_vmcs_exit(curr);

Again - is there a reason not to move this right after the __vmread()
above?

> +    return pending_event == 0;
> +}
> +
> +static void vmx_inject_pf(void)
> +{
> +    struct vcpu *curr = current;
> +    struct domain *currd = curr->domain;
> +    int errcode = PFEC_user_mode;

unsigned

> +    uint64_t virtual_address = currd->arch.hvm_domain.fault_info.virtual_address;
> +
> +    currd->arch.hvm_domain.fault_info.valid = 0;
> +
> +    if ( currd->arch.hvm_domain.fault_info.write_access )
> +        errcode |= PFEC_write_access;

Isn't this pretty limited a set of error codes you're able to generate
here?

> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -141,6 +141,14 @@ struct hvm_domain {
>       */
>      uint64_t sync_tsc;
>  
> +    /* Memory introspection page fault injection data. */
> +    struct {
> +        uint64_t address_space;
> +        uint64_t virtual_address;
> +        uint32_t write_access;

Is this meaningfully a 32-bit field (rather than just a boolean one)?

> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -937,6 +937,19 @@ typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
>  #endif
>  
> +/* XEN_DOMCTL_set_pagefault_info requests that a page fault occur at
> + * the next VMENTRY.
> + */

Coding style.

> +struct xen_domctl_set_pagefault_info {
> +    uint64_t address_space;
> +    uint64_t virtual_address;
> +    uint32_t write_access;

I guess you want this to be a fixed size one in the public interface,
but if only zero/non-zero counts, this could (a) be a uint8_t and
(b) get evaluated using the canonical !! when converting to the
internal structure.

Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 5/5] xen: Handle resumed instruction based on previous mem_event reply
  2014-08-11 15:08 ` [PATCH RFC V6 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
@ 2014-08-12 15:20   ` Jan Beulich
  2014-08-12 15:43     ` Razvan Cojocaru
  0 siblings, 1 reply; 17+ messages in thread
From: Jan Beulich @ 2014-08-12 15:20 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1699,6 +1699,18 @@ static void vmx_enable_msr_exit_interception(struct domain *d)
>              vmx_enable_intercept_for_msr(v, msrs[i], MSR_TYPE_W);
>  }
>  
> +static bool_t vmx_exited_by_nested_pagefault(void)
> +{
> +    unsigned long exit_qualification;
> +
> +    __vmread(EXIT_QUALIFICATION, &exit_qualification);
> +
> +    if ( (exit_qualification & EPT_GLA_FAULT) == 0 )
> +        return 0;
> +
> +    return 1;
> +}

As (I think) said on the previous version already - this would apparently
benefit from being based on Tamas's recent work (slated to go in once
the series got suitably acked).

Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 1/5] xen: Emulate with no writes
  2014-08-12 15:08   ` Razvan Cojocaru
@ 2014-08-12 15:27     ` Jan Beulich
  2014-08-12 15:49       ` Razvan Cojocaru
  0 siblings, 1 reply; 17+ messages in thread
From: Jan Beulich @ 2014-08-12 15:27 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 12.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
> There's a return there in the X86EMUL_UNHANDLEABLE case, so
> hvm_emulate_writeback(&ctxt) doesn't get called.

Right, but other than in you case there's no exception being injected
there.

Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-12 15:16   ` Jan Beulich
@ 2014-08-12 15:42     ` Razvan Cojocaru
  2014-08-12 16:03       ` Jan Beulich
  0 siblings, 1 reply; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-12 15:42 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/12/2014 06:16 PM, Jan Beulich wrote:
>>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
>> +    if ( (curr->arch.hvm_vcpu.guest_cr[3] & mask) !=
>> +         (currd->arch.hvm_domain.fault_info.address_space & mask) )
>> +        return 0;
>> +
>> +    vmx_vmcs_enter(curr);
>> +    __vmread(VM_ENTRY_INTR_INFO, &ev);
>> +
>> +    if ( (ev & INTR_INFO_VALID_MASK) &&
>> +         hvm_event_needs_reinjection(MASK_EXTR(ev, INTR_INFO_INTR_TYPE_MASK),
>> +                                     ev & INTR_INFO_VECTOR_MASK) )
>> +        pending_event = ev;
>> +
>> +    vmx_vmcs_exit(curr);
> 
> Again - is there a reason not to move this right after the __vmread()
> above?

No, sorry, I've missed that. No reason, I'll move it.

>> +    uint64_t virtual_address = currd->arch.hvm_domain.fault_info.virtual_address;
>> +
>> +    currd->arch.hvm_domain.fault_info.valid = 0;
>> +
>> +    if ( currd->arch.hvm_domain.fault_info.write_access )
>> +        errcode |= PFEC_write_access;
> 
> Isn't this pretty limited a set of error codes you're able to generate
> here?

For the purpose of simply bringing in pages that the guest OS has
swapped out for inspection it's been enough.

I've thought about making this a parameter to the new libxc function,
but it seems that the PFEC_... codes didn't make it into the public Xen
headers.

>> --- a/xen/include/asm-x86/hvm/domain.h
>> +++ b/xen/include/asm-x86/hvm/domain.h
>> @@ -141,6 +141,14 @@ struct hvm_domain {
>>       */
>>      uint64_t sync_tsc;
>>  
>> +    /* Memory introspection page fault injection data. */
>> +    struct {
>> +        uint64_t address_space;
>> +        uint64_t virtual_address;
>> +        uint32_t write_access;
> 
> Is this meaningfully a 32-bit field (rather than just a boolean one)?

No, it's just a boolean one. I'll modify the code to properly reflect that.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 5/5] xen: Handle resumed instruction based on previous mem_event reply
  2014-08-12 15:20   ` Jan Beulich
@ 2014-08-12 15:43     ` Razvan Cojocaru
  0 siblings, 0 replies; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-12 15:43 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/12/2014 06:20 PM, Jan Beulich wrote:
>>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -1699,6 +1699,18 @@ static void vmx_enable_msr_exit_interception(struct domain *d)
>>              vmx_enable_intercept_for_msr(v, msrs[i], MSR_TYPE_W);
>>  }
>>  
>> +static bool_t vmx_exited_by_nested_pagefault(void)
>> +{
>> +    unsigned long exit_qualification;
>> +
>> +    __vmread(EXIT_QUALIFICATION, &exit_qualification);
>> +
>> +    if ( (exit_qualification & EPT_GLA_FAULT) == 0 )
>> +        return 0;
>> +
>> +    return 1;
>> +}
> 
> As (I think) said on the previous version already - this would apparently
> benefit from being based on Tamas's recent work (slated to go in once
> the series got suitably acked).

I agree, that would be great. In the meantime, I just thought that the
patches should be self-sufficient.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 1/5] xen: Emulate with no writes
  2014-08-12 15:27     ` Jan Beulich
@ 2014-08-12 15:49       ` Razvan Cojocaru
  0 siblings, 0 replies; 17+ messages in thread
From: Razvan Cojocaru @ 2014-08-12 15:49 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/12/2014 06:27 PM, Jan Beulich wrote:
>>>> On 12.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
>> There's a return there in the X86EMUL_UNHANDLEABLE case, so
>> hvm_emulate_writeback(&ctxt) doesn't get called.
> 
> Right, but other than in you case there's no exception being injected
> there.

Of course, thanks for catching that! I'll fix that too.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-12 15:42     ` Razvan Cojocaru
@ 2014-08-12 16:03       ` Jan Beulich
  0 siblings, 0 replies; 17+ messages in thread
From: Jan Beulich @ 2014-08-12 16:03 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 12.08.14 at 17:42, <rcojocaru@bitdefender.com> wrote:
> On 08/12/2014 06:16 PM, Jan Beulich wrote:
>>>>> On 11.08.14 at 17:08, <rcojocaru@bitdefender.com> wrote:
>>> +    uint64_t virtual_address = currd->arch.hvm_domain.fault_info.virtual_address;
>>> +
>>> +    currd->arch.hvm_domain.fault_info.valid = 0;
>>> +
>>> +    if ( currd->arch.hvm_domain.fault_info.write_access )
>>> +        errcode |= PFEC_write_access;
>> 
>> Isn't this pretty limited a set of error codes you're able to generate
>> here?
> 
> For the purpose of simply bringing in pages that the guest OS has
> swapped out for inspection it's been enough.
> 
> I've thought about making this a parameter to the new libxc function,
> but it seems that the PFEC_... codes didn't make it into the public Xen
> headers.

But they're architectural values, so passing just numbers (and
referring to the SDM/PM) would seem fine to me. In which case
you probably wouldn't even need the write_access fields.

Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2014-08-12 16:03 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-11 15:08 [PATCH RFC V6 1/5] xen: Emulate with no writes Razvan Cojocaru
2014-08-11 15:08 ` [PATCH RFC V6 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
2014-08-12 15:01   ` Jan Beulich
2014-08-11 15:08 ` [PATCH RFC V6 3/5] xen, libxc: Force-enable relevant MSR events Razvan Cojocaru
2014-08-12 15:06   ` Jan Beulich
2014-08-12 15:09     ` Razvan Cojocaru
2014-08-11 15:08 ` [PATCH RFC V6 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
2014-08-12 15:16   ` Jan Beulich
2014-08-12 15:42     ` Razvan Cojocaru
2014-08-12 16:03       ` Jan Beulich
2014-08-11 15:08 ` [PATCH RFC V6 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
2014-08-12 15:20   ` Jan Beulich
2014-08-12 15:43     ` Razvan Cojocaru
2014-08-12 14:57 ` [PATCH RFC V6 1/5] xen: Emulate with no writes Jan Beulich
2014-08-12 15:08   ` Razvan Cojocaru
2014-08-12 15:27     ` Jan Beulich
2014-08-12 15:49       ` Razvan Cojocaru

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.