All of lore.kernel.org
 help / color / mirror / Atom feed
* [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
@ 2019-06-04 11:49 Alexandru Stefan ISAILA
  2019-06-11 12:45 ` Paul Durrant
  2019-06-12  9:27 ` Alexandru Stefan ISAILA
  0 siblings, 2 replies; 13+ messages in thread
From: Alexandru Stefan ISAILA @ 2019-06-04 11:49 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, tamas, suravee.suthikulpanit, wl, jun.nakajima,
	andrew.cooper3, rcojocaru, tim, george.dunlap, paul.durrant,
	jbeulich, Alexandru Stefan ISAILA, boris.ostrovsky, brian.woods,
	roger.pau

This patch aims to have mem access vm events sent from the emulator.
This is useful where we want to only emulate a page walk without
checking the EPT, but we still want to check the EPT when emulating
the instruction that caused the page walk. In this case, the original
EPT fault is caused by the walk trying to set the accessed or dirty
bits, but executing the instruction itself might also cause an EPT
fault if permitted to run, and this second fault should not be lost.

We use hvmemul_map_linear_addr() to intercept r/w access and
__hvm_copy() to intercept exec access.

First we try to send a vm event and if the event is sent then emulation
returns X86EMUL_RETRY in order to stop emulation on instructions that
use access protected pages. If the event is not sent then the
emulation goes on as expected.

Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>

---
Changes since V4:
	- Move the exec interception to __hvm_copy()
	- Remove the page-walk in hvm_emulate_send_vm_event() and get
the needed address from the existing page walk
	- Add send_event param to __hvm_copy() and
hvm_copy_from_guest_linear()
	- Drop X86EMUL_ACCESS_EXCEPTION and use X86EMUL_RETRY instead.
---
 xen/arch/x86/hvm/emulate.c        | 71 +++++++++++++++++++++++++++++--
 xen/arch/x86/hvm/hvm.c            | 27 +++++++-----
 xen/arch/x86/hvm/svm/svm.c        |  2 +-
 xen/arch/x86/hvm/vm_event.c       |  2 +-
 xen/arch/x86/hvm/vmx/vvmx.c       |  2 +-
 xen/arch/x86/mm/mem_access.c      |  3 +-
 xen/arch/x86/mm/shadow/common.c   |  4 +-
 xen/arch/x86/mm/shadow/hvm.c      |  2 +-
 xen/include/asm-x86/hvm/emulate.h |  9 +++-
 xen/include/asm-x86/hvm/support.h |  2 +-
 10 files changed, 101 insertions(+), 23 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 8659c89862..9b2d8c2014 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -12,9 +12,11 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/sched.h>
+#include <xen/monitor.h>
 #include <xen/paging.h>
 #include <xen/trace.h>
 #include <xen/vm_event.h>
+#include <asm/altp2m.h>
 #include <asm/event.h>
 #include <asm/i387.h>
 #include <asm/xstate.h>
@@ -530,6 +532,57 @@ static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
     return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
 }
 
+bool hvm_emulate_send_vm_event(unsigned long gla, gfn_t gfn,
+                               uint32_t pfec, bool send_event)
+{
+    xenmem_access_t access;
+    vm_event_request_t req = {};
+    paddr_t gpa = ((gfn_x(gfn) << PAGE_SHIFT) | (gla & ~PAGE_MASK));
+
+    if ( !send_event || !pfec )
+        return false;
+
+    if ( p2m_get_mem_access(current->domain, gfn, &access,
+                            altp2m_vcpu_idx(current)) != 0 )
+        return false;
+
+    switch ( access ) {
+    case XENMEM_access_x:
+    case XENMEM_access_rx:
+        if ( pfec & PFEC_write_access )
+            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
+        break;
+
+    case XENMEM_access_w:
+    case XENMEM_access_rw:
+        if ( pfec & PFEC_insn_fetch )
+            req.u.mem_access.flags = MEM_ACCESS_X;
+        break;
+
+    case XENMEM_access_r:
+    case XENMEM_access_n:
+        if ( pfec & PFEC_write_access )
+            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
+        if ( pfec & PFEC_insn_fetch )
+            req.u.mem_access.flags |= MEM_ACCESS_X;
+        break;
+
+    default:
+        return false;
+    }
+
+    if ( !req.u.mem_access.flags )
+        return false; /* no violation */
+
+    req.reason = VM_EVENT_REASON_MEM_ACCESS;
+    req.u.mem_access.gfn = gfn_x(gfn);
+    req.u.mem_access.flags |= MEM_ACCESS_FAULT_WITH_GLA | MEM_ACCESS_GLA_VALID;
+    req.u.mem_access.gla = gla;
+    req.u.mem_access.offset = gpa & ((1 << PAGE_SHIFT) - 1);
+
+    return monitor_traps(current, true, &req) >= 0;
+}
+
 /*
  * Map the frame(s) covering an individual linear access, for writeable
  * access.  May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other errors
@@ -547,6 +600,7 @@ static void *hvmemul_map_linear_addr(
     unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
         (linear >> PAGE_SHIFT) + 1;
     unsigned int i;
+    gfn_t gfn;
 
     /*
      * mfn points to the next free slot.  All used slots have a page reference
@@ -585,7 +639,7 @@ static void *hvmemul_map_linear_addr(
         ASSERT(mfn_x(*mfn) == 0);
 
         res = hvm_translate_get_page(curr, addr, true, pfec,
-                                     &pfinfo, &page, NULL, &p2mt);
+                                     &pfinfo, &page, &gfn, &p2mt);
 
         switch ( res )
         {
@@ -615,6 +669,13 @@ static void *hvmemul_map_linear_addr(
 
         if ( pfec & PFEC_write_access )
         {
+            if ( hvm_emulate_send_vm_event(addr, gfn, pfec,
+                                           hvmemul_ctxt->send_event) )
+            {
+                err = ERR_PTR(~X86EMUL_RETRY);
+                goto out;
+            }
+
             if ( p2m_is_discard_write(p2mt) )
             {
                 err = ERR_PTR(~X86EMUL_OKAY);
@@ -1115,7 +1176,8 @@ static int linear_read(unsigned long addr, unsigned int bytes, void *p_data,
      * clean up any interim state.
      */
     if ( !hvmemul_find_mmio_cache(vio, addr, IOREQ_READ, false) )
-        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
+        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo,
+                                        hvmemul_ctxt->send_event);
 
     switch ( rc )
     {
@@ -2509,12 +2571,13 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
 }
 
 void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
-    unsigned int errcode)
+    unsigned int errcode, bool send_event)
 {
     struct hvm_emulate_ctxt ctx = {{ 0 }};
     int rc;
 
     hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
+    ctx.send_event = send_event;
 
     switch ( kind )
     {
@@ -2629,7 +2692,7 @@ void hvm_emulate_init_per_insn(
              hvm_copy_from_guest_linear(hvmemul_ctxt->insn_buf, addr,
                                         sizeof(hvmemul_ctxt->insn_buf),
                                         pfec | PFEC_insn_fetch,
-                                        NULL) == HVMTRANS_okay) ?
+                                        NULL, false) == HVMTRANS_okay) ?
             sizeof(hvmemul_ctxt->insn_buf) : 0;
     }
     else
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 029eea3b85..f6df57b442 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2942,7 +2942,7 @@ void hvm_task_switch(
     }
 
     rc = hvm_copy_from_guest_linear(
-        &tss, prev_tr.base, sizeof(tss), PFEC_page_present, &pfinfo);
+        &tss, prev_tr.base, sizeof(tss), PFEC_page_present, &pfinfo, false);
     if ( rc == HVMTRANS_bad_linear_to_gfn )
         hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
     if ( rc != HVMTRANS_okay )
@@ -2989,7 +2989,7 @@ void hvm_task_switch(
         goto out;
 
     rc = hvm_copy_from_guest_linear(
-        &tss, tr.base, sizeof(tss), PFEC_page_present, &pfinfo);
+        &tss, tr.base, sizeof(tss), PFEC_page_present, &pfinfo, false);
     if ( rc == HVMTRANS_bad_linear_to_gfn )
         hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
     /*
@@ -3180,7 +3180,7 @@ enum hvm_translation_result hvm_translate_get_page(
 #define HVMCOPY_linear     (1u<<2)
 static enum hvm_translation_result __hvm_copy(
     void *buf, paddr_t addr, int size, struct vcpu *v, unsigned int flags,
-    uint32_t pfec, pagefault_info_t *pfinfo)
+    uint32_t pfec, pagefault_info_t *pfinfo, bool send_event)
 {
     gfn_t gfn;
     struct page_info *page;
@@ -3224,6 +3224,12 @@ static enum hvm_translation_result __hvm_copy(
             return HVMTRANS_bad_gfn_to_mfn;
         }
 
+        if ( hvm_emulate_send_vm_event(addr, gfn, pfec, send_event) )
+        {
+            put_page(page);
+            return HVMTRANS_gfn_paged_out;
+        }
+
         p = (char *)__map_domain_page(page) + (addr & ~PAGE_MASK);
 
         if ( flags & HVMCOPY_to_guest )
@@ -3267,14 +3273,14 @@ enum hvm_translation_result hvm_copy_to_guest_phys(
     paddr_t paddr, void *buf, int size, struct vcpu *v)
 {
     return __hvm_copy(buf, paddr, size, v,
-                      HVMCOPY_to_guest | HVMCOPY_phys, 0, NULL);
+                      HVMCOPY_to_guest | HVMCOPY_phys, 0, NULL, false);
 }
 
 enum hvm_translation_result hvm_copy_from_guest_phys(
     void *buf, paddr_t paddr, int size)
 {
     return __hvm_copy(buf, paddr, size, current,
-                      HVMCOPY_from_guest | HVMCOPY_phys, 0, NULL);
+                      HVMCOPY_from_guest | HVMCOPY_phys, 0, NULL, false);
 }
 
 enum hvm_translation_result hvm_copy_to_guest_linear(
@@ -3283,16 +3289,17 @@ enum hvm_translation_result hvm_copy_to_guest_linear(
 {
     return __hvm_copy(buf, addr, size, current,
                       HVMCOPY_to_guest | HVMCOPY_linear,
-                      PFEC_page_present | PFEC_write_access | pfec, pfinfo);
+                      PFEC_page_present | PFEC_write_access | pfec, pfinfo,
+                      false);
 }
 
 enum hvm_translation_result hvm_copy_from_guest_linear(
     void *buf, unsigned long addr, int size, uint32_t pfec,
-    pagefault_info_t *pfinfo)
+    pagefault_info_t *pfinfo, bool send_event)
 {
     return __hvm_copy(buf, addr, size, current,
                       HVMCOPY_from_guest | HVMCOPY_linear,
-                      PFEC_page_present | pfec, pfinfo);
+                      PFEC_page_present | pfec, pfinfo, send_event);
 }
 
 unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len)
@@ -3333,7 +3340,7 @@ unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len)
         return 0;
     }
 
-    rc = hvm_copy_from_guest_linear(to, (unsigned long)from, len, 0, NULL);
+    rc = hvm_copy_from_guest_linear(to, (unsigned long)from, len, 0, NULL, false);
     return rc ? len : 0; /* fake a copy_from_user() return code */
 }
 
@@ -3707,7 +3714,7 @@ void hvm_ud_intercept(struct cpu_user_regs *regs)
                                         sizeof(sig), hvm_access_insn_fetch,
                                         cs, &addr) &&
              (hvm_copy_from_guest_linear(sig, addr, sizeof(sig),
-                                         walk, NULL) == HVMTRANS_okay) &&
+                                         walk, NULL, false) == HVMTRANS_okay) &&
              (memcmp(sig, "\xf\xbxen", sizeof(sig)) == 0) )
         {
             regs->rip += sizeof(sig);
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index cd6a6b382b..d0d1d7e0a7 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1255,7 +1255,7 @@ static void svm_emul_swint_injection(struct x86_event *event)
         goto raise_exception;
 
     rc = hvm_copy_from_guest_linear(&idte, idte_linear_addr, idte_size,
-                                    PFEC_implicit, &pfinfo);
+                                    PFEC_implicit, &pfinfo, false);
     if ( rc )
     {
         if ( rc == HVMTRANS_bad_linear_to_gfn )
diff --git a/xen/arch/x86/hvm/vm_event.c b/xen/arch/x86/hvm/vm_event.c
index 121de23071..6d203e8db5 100644
--- a/xen/arch/x86/hvm/vm_event.c
+++ b/xen/arch/x86/hvm/vm_event.c
@@ -87,7 +87,7 @@ void hvm_vm_event_do_resume(struct vcpu *v)
             kind = EMUL_KIND_SET_CONTEXT_INSN;
 
         hvm_emulate_one_vm_event(kind, TRAP_invalid_op,
-                                 X86_EVENT_NO_EC);
+                                 X86_EVENT_NO_EC, false);
 
         v->arch.vm_event->emulate_flags = 0;
     }
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 7bca572d88..04be8b98b6 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -426,7 +426,7 @@ static int decode_vmx_inst(struct cpu_user_regs *regs,
         {
             pagefault_info_t pfinfo;
             int rc = hvm_copy_from_guest_linear(poperandS, base, size,
-                                                0, &pfinfo);
+                                                0, &pfinfo, false);
 
             if ( rc == HVMTRANS_bad_linear_to_gfn )
                 hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index 0144f92b98..c9972bab8c 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -214,7 +214,8 @@ bool p2m_mem_access_check(paddr_t gpa, unsigned long gla,
          d->arch.monitor.inguest_pagefault_disabled &&
          npfec.kind != npfec_kind_with_gla ) /* don't send a mem_event */
     {
-        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op, X86_EVENT_NO_EC);
+        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op,
+                                 X86_EVENT_NO_EC, true);
 
         return true;
     }
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 795201dc82..2bb80accf0 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -166,7 +166,7 @@ const struct x86_emulate_ops *shadow_init_emulation(
             hvm_access_insn_fetch, sh_ctxt, &addr) &&
          !hvm_copy_from_guest_linear(
              sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf),
-             PFEC_insn_fetch, NULL))
+             PFEC_insn_fetch, NULL, false))
         ? sizeof(sh_ctxt->insn_buf) : 0;
 
     return &hvm_shadow_emulator_ops;
@@ -201,7 +201,7 @@ void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt,
                 hvm_access_insn_fetch, sh_ctxt, &addr) &&
              !hvm_copy_from_guest_linear(
                  sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf),
-                 PFEC_insn_fetch, NULL))
+                 PFEC_insn_fetch, NULL, false))
             ? sizeof(sh_ctxt->insn_buf) : 0;
         sh_ctxt->insn_buf_eip = regs->rip;
     }
diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
index c6469c846c..3841d0ceb7 100644
--- a/xen/arch/x86/mm/shadow/hvm.c
+++ b/xen/arch/x86/mm/shadow/hvm.c
@@ -125,7 +125,7 @@ hvm_read(enum x86_segment seg,
     rc = hvm_copy_from_guest_linear(p_data, addr, bytes,
                                     (access_type == hvm_access_insn_fetch
                                      ? PFEC_insn_fetch : 0),
-                                    &pfinfo);
+                                    &pfinfo, false);
 
     switch ( rc )
     {
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index b39a1a0331..9bed0aa83e 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -47,6 +47,7 @@ struct hvm_emulate_ctxt {
     uint32_t intr_shadow;
 
     bool_t set_context;
+    bool send_event;
 };
 
 enum emul_kind {
@@ -63,7 +64,8 @@ int hvm_emulate_one(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 void hvm_emulate_one_vm_event(enum emul_kind kind,
     unsigned int trapnr,
-    unsigned int errcode);
+    unsigned int errcode,
+    bool send_event);
 /* Must be called once to set up hvmemul state. */
 void hvm_emulate_init_once(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
@@ -80,6 +82,11 @@ struct segment_register *hvmemul_get_seg_reg(
     enum x86_segment seg,
     struct hvm_emulate_ctxt *hvmemul_ctxt);
 int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla);
+bool hvm_emulate_send_vm_event(
+    unsigned long gla,
+    gfn_t gfn,
+    uint32_t pfec,
+    bool send_event);
 
 static inline bool handle_mmio(void)
 {
diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
index e989aa7349..914f388921 100644
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -99,7 +99,7 @@ enum hvm_translation_result hvm_copy_to_guest_linear(
     pagefault_info_t *pfinfo);
 enum hvm_translation_result hvm_copy_from_guest_linear(
     void *buf, unsigned long addr, int size, uint32_t pfec,
-    pagefault_info_t *pfinfo);
+    pagefault_info_t *pfinfo, bool send_event);
 
 /*
  * Get a reference on the page under an HVM physical or linear address.  If
-- 
2.17.1

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-06-04 11:49 [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate Alexandru Stefan ISAILA
@ 2019-06-11 12:45 ` Paul Durrant
  2019-06-12  9:27 ` Alexandru Stefan ISAILA
  1 sibling, 0 replies; 13+ messages in thread
From: Paul Durrant @ 2019-06-11 12:45 UTC (permalink / raw)
  To: 'Alexandru Stefan ISAILA', xen-devel
  Cc: Kevin Tian, tamas, jbeulich, wl, jun.nakajima, Andrew Cooper,
	rcojocaru, George Dunlap, Tim (Xen.org),
	suravee.suthikulpanit, boris.ostrovsky, brian.woods,
	Roger Pau Monne

> -----Original Message-----
> From: Alexandru Stefan ISAILA [mailto:aisaila@bitdefender.com]
> Sent: 04 June 2019 12:50
> To: xen-devel@lists.xenproject.org
> Cc: Paul Durrant <Paul.Durrant@citrix.com>; jbeulich@suse.com; Andrew Cooper
> <Andrew.Cooper3@citrix.com>; wl@xen.org; Roger Pau Monne <roger.pau@citrix.com>;
> boris.ostrovsky@oracle.com; suravee.suthikulpanit@amd.com; brian.woods@amd.com;
> rcojocaru@bitdefender.com; tamas@tklengyel.com; jun.nakajima@intel.com; Kevin Tian
> <kevin.tian@intel.com>; George Dunlap <George.Dunlap@citrix.com>; Tim (Xen.org) <tim@xen.org>;
> Alexandru Stefan ISAILA <aisaila@bitdefender.com>
> Subject: [PATCH v5] x86/emulate: Send vm_event from emulate
> 
> This patch aims to have mem access vm events sent from the emulator.
> This is useful where we want to only emulate a page walk without
> checking the EPT, but we still want to check the EPT when emulating
> the instruction that caused the page walk. In this case, the original
> EPT fault is caused by the walk trying to set the accessed or dirty
> bits, but executing the instruction itself might also cause an EPT
> fault if permitted to run, and this second fault should not be lost.
> 
> We use hvmemul_map_linear_addr() to intercept r/w access and
> __hvm_copy() to intercept exec access.
> 
> First we try to send a vm event and if the event is sent then emulation
> returns X86EMUL_RETRY in order to stop emulation on instructions that
> use access protected pages. If the event is not sent then the
> emulation goes on as expected.
> 
> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>

Emulation parts...

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

...with one nit, inline below...

> 
> ---
> Changes since V4:
> 	- Move the exec interception to __hvm_copy()
> 	- Remove the page-walk in hvm_emulate_send_vm_event() and get
> the needed address from the existing page walk
> 	- Add send_event param to __hvm_copy() and
> hvm_copy_from_guest_linear()
> 	- Drop X86EMUL_ACCESS_EXCEPTION and use X86EMUL_RETRY instead.
> ---
>  xen/arch/x86/hvm/emulate.c        | 71 +++++++++++++++++++++++++++++--
>  xen/arch/x86/hvm/hvm.c            | 27 +++++++-----
>  xen/arch/x86/hvm/svm/svm.c        |  2 +-
>  xen/arch/x86/hvm/vm_event.c       |  2 +-
>  xen/arch/x86/hvm/vmx/vvmx.c       |  2 +-
>  xen/arch/x86/mm/mem_access.c      |  3 +-
>  xen/arch/x86/mm/shadow/common.c   |  4 +-
>  xen/arch/x86/mm/shadow/hvm.c      |  2 +-
>  xen/include/asm-x86/hvm/emulate.h |  9 +++-
>  xen/include/asm-x86/hvm/support.h |  2 +-
>  10 files changed, 101 insertions(+), 23 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
> index 8659c89862..9b2d8c2014 100644
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -12,9 +12,11 @@
>  #include <xen/init.h>
>  #include <xen/lib.h>
>  #include <xen/sched.h>
> +#include <xen/monitor.h>
>  #include <xen/paging.h>
>  #include <xen/trace.h>
>  #include <xen/vm_event.h>
> +#include <asm/altp2m.h>
>  #include <asm/event.h>
>  #include <asm/i387.h>
>  #include <asm/xstate.h>
> @@ -530,6 +532,57 @@ static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
>      return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
>  }
> 
> +bool hvm_emulate_send_vm_event(unsigned long gla, gfn_t gfn,
> +                               uint32_t pfec, bool send_event)
> +{
> +    xenmem_access_t access;
> +    vm_event_request_t req = {};
> +    paddr_t gpa = ((gfn_x(gfn) << PAGE_SHIFT) | (gla & ~PAGE_MASK));
> +
> +    if ( !send_event || !pfec )
> +        return false;
> +
> +    if ( p2m_get_mem_access(current->domain, gfn, &access,
> +                            altp2m_vcpu_idx(current)) != 0 )
> +        return false;
> +
> +    switch ( access ) {
> +    case XENMEM_access_x:
> +    case XENMEM_access_rx:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
> +        break;
> +
> +    case XENMEM_access_w:
> +    case XENMEM_access_rw:
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags = MEM_ACCESS_X;
> +        break;
> +
> +    case XENMEM_access_r:
> +    case XENMEM_access_n:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags |= MEM_ACCESS_X;
> +        break;
> +
> +    default:
> +        return false;
> +    }
> +
> +    if ( !req.u.mem_access.flags )
> +        return false; /* no violation */
> +
> +    req.reason = VM_EVENT_REASON_MEM_ACCESS;
> +    req.u.mem_access.gfn = gfn_x(gfn);
> +    req.u.mem_access.flags |= MEM_ACCESS_FAULT_WITH_GLA | MEM_ACCESS_GLA_VALID;
> +    req.u.mem_access.gla = gla;
> +    req.u.mem_access.offset = gpa & ((1 << PAGE_SHIFT) - 1);

& ~PAGE_MASK?

> +
> +    return monitor_traps(current, true, &req) >= 0;
> +}
> +
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-06-04 11:49 [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate Alexandru Stefan ISAILA
  2019-06-11 12:45 ` Paul Durrant
@ 2019-06-12  9:27 ` Alexandru Stefan ISAILA
  1 sibling, 0 replies; 13+ messages in thread
From: Alexandru Stefan ISAILA @ 2019-06-12  9:27 UTC (permalink / raw)
  To: xen-devel, wl, boris.ostrovsky, suravee.suthikulpanit,
	brian.woods, tamas, jun.nakajima, kevin.tian, george.dunlap, tim
  Cc: andrew.cooper3, paul.durrant, rcojocaru, jbeulich, roger.pau

Hi all,

Any remarks on the patch at hand are appreciated.

Thanks,
Alex

On 04.06.2019 14:49, Alexandru Stefan ISAILA wrote:
> This patch aims to have mem access vm events sent from the emulator.
> This is useful where we want to only emulate a page walk without
> checking the EPT, but we still want to check the EPT when emulating
> the instruction that caused the page walk. In this case, the original
> EPT fault is caused by the walk trying to set the accessed or dirty
> bits, but executing the instruction itself might also cause an EPT
> fault if permitted to run, and this second fault should not be lost.
> 
> We use hvmemul_map_linear_addr() to intercept r/w access and
> __hvm_copy() to intercept exec access.
> 
> First we try to send a vm event and if the event is sent then emulation
> returns X86EMUL_RETRY in order to stop emulation on instructions that
> use access protected pages. If the event is not sent then the
> emulation goes on as expected.
> 
> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
> 
> ---
> Changes since V4:
> 	- Move the exec interception to __hvm_copy()
> 	- Remove the page-walk in hvm_emulate_send_vm_event() and get
> the needed address from the existing page walk
> 	- Add send_event param to __hvm_copy() and
> hvm_copy_from_guest_linear()
> 	- Drop X86EMUL_ACCESS_EXCEPTION and use X86EMUL_RETRY instead.
> ---
>   xen/arch/x86/hvm/emulate.c        | 71 +++++++++++++++++++++++++++++--
>   xen/arch/x86/hvm/hvm.c            | 27 +++++++-----
>   xen/arch/x86/hvm/svm/svm.c        |  2 +-
>   xen/arch/x86/hvm/vm_event.c       |  2 +-
>   xen/arch/x86/hvm/vmx/vvmx.c       |  2 +-
>   xen/arch/x86/mm/mem_access.c      |  3 +-
>   xen/arch/x86/mm/shadow/common.c   |  4 +-
>   xen/arch/x86/mm/shadow/hvm.c      |  2 +-
>   xen/include/asm-x86/hvm/emulate.h |  9 +++-
>   xen/include/asm-x86/hvm/support.h |  2 +-
>   10 files changed, 101 insertions(+), 23 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
> index 8659c89862..9b2d8c2014 100644
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -12,9 +12,11 @@
>   #include <xen/init.h>
>   #include <xen/lib.h>
>   #include <xen/sched.h>
> +#include <xen/monitor.h>
>   #include <xen/paging.h>
>   #include <xen/trace.h>
>   #include <xen/vm_event.h>
> +#include <asm/altp2m.h>
>   #include <asm/event.h>
>   #include <asm/i387.h>
>   #include <asm/xstate.h>
> @@ -530,6 +532,57 @@ static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
>       return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
>   }
>   
> +bool hvm_emulate_send_vm_event(unsigned long gla, gfn_t gfn,
> +                               uint32_t pfec, bool send_event)
> +{
> +    xenmem_access_t access;
> +    vm_event_request_t req = {};
> +    paddr_t gpa = ((gfn_x(gfn) << PAGE_SHIFT) | (gla & ~PAGE_MASK));
> +
> +    if ( !send_event || !pfec )
> +        return false;
> +
> +    if ( p2m_get_mem_access(current->domain, gfn, &access,
> +                            altp2m_vcpu_idx(current)) != 0 )
> +        return false;
> +
> +    switch ( access ) {
> +    case XENMEM_access_x:
> +    case XENMEM_access_rx:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
> +        break;
> +
> +    case XENMEM_access_w:
> +    case XENMEM_access_rw:
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags = MEM_ACCESS_X;
> +        break;
> +
> +    case XENMEM_access_r:
> +    case XENMEM_access_n:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags |= MEM_ACCESS_X;
> +        break;
> +
> +    default:
> +        return false;
> +    }
> +
> +    if ( !req.u.mem_access.flags )
> +        return false; /* no violation */
> +
> +    req.reason = VM_EVENT_REASON_MEM_ACCESS;
> +    req.u.mem_access.gfn = gfn_x(gfn);
> +    req.u.mem_access.flags |= MEM_ACCESS_FAULT_WITH_GLA | MEM_ACCESS_GLA_VALID;
> +    req.u.mem_access.gla = gla;
> +    req.u.mem_access.offset = gpa & ((1 << PAGE_SHIFT) - 1);
> +
> +    return monitor_traps(current, true, &req) >= 0;
> +}
> +
>   /*
>    * Map the frame(s) covering an individual linear access, for writeable
>    * access.  May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other errors
> @@ -547,6 +600,7 @@ static void *hvmemul_map_linear_addr(
>       unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
>           (linear >> PAGE_SHIFT) + 1;
>       unsigned int i;
> +    gfn_t gfn;
>   
>       /*
>        * mfn points to the next free slot.  All used slots have a page reference
> @@ -585,7 +639,7 @@ static void *hvmemul_map_linear_addr(
>           ASSERT(mfn_x(*mfn) == 0);
>   
>           res = hvm_translate_get_page(curr, addr, true, pfec,
> -                                     &pfinfo, &page, NULL, &p2mt);
> +                                     &pfinfo, &page, &gfn, &p2mt);
>   
>           switch ( res )
>           {
> @@ -615,6 +669,13 @@ static void *hvmemul_map_linear_addr(
>   
>           if ( pfec & PFEC_write_access )
>           {
> +            if ( hvm_emulate_send_vm_event(addr, gfn, pfec,
> +                                           hvmemul_ctxt->send_event) )
> +            {
> +                err = ERR_PTR(~X86EMUL_RETRY);
> +                goto out;
> +            }
> +
>               if ( p2m_is_discard_write(p2mt) )
>               {
>                   err = ERR_PTR(~X86EMUL_OKAY);
> @@ -1115,7 +1176,8 @@ static int linear_read(unsigned long addr, unsigned int bytes, void *p_data,
>        * clean up any interim state.
>        */
>       if ( !hvmemul_find_mmio_cache(vio, addr, IOREQ_READ, false) )
> -        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
> +        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo,
> +                                        hvmemul_ctxt->send_event);
>   
>       switch ( rc )
>       {
> @@ -2509,12 +2571,13 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
>   }
>   
>   void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
> -    unsigned int errcode)
> +    unsigned int errcode, bool send_event)
>   {
>       struct hvm_emulate_ctxt ctx = {{ 0 }};
>       int rc;
>   
>       hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
> +    ctx.send_event = send_event;
>   
>       switch ( kind )
>       {
> @@ -2629,7 +2692,7 @@ void hvm_emulate_init_per_insn(
>                hvm_copy_from_guest_linear(hvmemul_ctxt->insn_buf, addr,
>                                           sizeof(hvmemul_ctxt->insn_buf),
>                                           pfec | PFEC_insn_fetch,
> -                                        NULL) == HVMTRANS_okay) ?
> +                                        NULL, false) == HVMTRANS_okay) ?
>               sizeof(hvmemul_ctxt->insn_buf) : 0;
>       }
>       else
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 029eea3b85..f6df57b442 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -2942,7 +2942,7 @@ void hvm_task_switch(
>       }
>   
>       rc = hvm_copy_from_guest_linear(
> -        &tss, prev_tr.base, sizeof(tss), PFEC_page_present, &pfinfo);
> +        &tss, prev_tr.base, sizeof(tss), PFEC_page_present, &pfinfo, false);
>       if ( rc == HVMTRANS_bad_linear_to_gfn )
>           hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
>       if ( rc != HVMTRANS_okay )
> @@ -2989,7 +2989,7 @@ void hvm_task_switch(
>           goto out;
>   
>       rc = hvm_copy_from_guest_linear(
> -        &tss, tr.base, sizeof(tss), PFEC_page_present, &pfinfo);
> +        &tss, tr.base, sizeof(tss), PFEC_page_present, &pfinfo, false);
>       if ( rc == HVMTRANS_bad_linear_to_gfn )
>           hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
>       /*
> @@ -3180,7 +3180,7 @@ enum hvm_translation_result hvm_translate_get_page(
>   #define HVMCOPY_linear     (1u<<2)
>   static enum hvm_translation_result __hvm_copy(
>       void *buf, paddr_t addr, int size, struct vcpu *v, unsigned int flags,
> -    uint32_t pfec, pagefault_info_t *pfinfo)
> +    uint32_t pfec, pagefault_info_t *pfinfo, bool send_event)
>   {
>       gfn_t gfn;
>       struct page_info *page;
> @@ -3224,6 +3224,12 @@ static enum hvm_translation_result __hvm_copy(
>               return HVMTRANS_bad_gfn_to_mfn;
>           }
>   
> +        if ( hvm_emulate_send_vm_event(addr, gfn, pfec, send_event) )
> +        {
> +            put_page(page);
> +            return HVMTRANS_gfn_paged_out;
> +        }
> +
>           p = (char *)__map_domain_page(page) + (addr & ~PAGE_MASK);
>   
>           if ( flags & HVMCOPY_to_guest )
> @@ -3267,14 +3273,14 @@ enum hvm_translation_result hvm_copy_to_guest_phys(
>       paddr_t paddr, void *buf, int size, struct vcpu *v)
>   {
>       return __hvm_copy(buf, paddr, size, v,
> -                      HVMCOPY_to_guest | HVMCOPY_phys, 0, NULL);
> +                      HVMCOPY_to_guest | HVMCOPY_phys, 0, NULL, false);
>   }
>   
>   enum hvm_translation_result hvm_copy_from_guest_phys(
>       void *buf, paddr_t paddr, int size)
>   {
>       return __hvm_copy(buf, paddr, size, current,
> -                      HVMCOPY_from_guest | HVMCOPY_phys, 0, NULL);
> +                      HVMCOPY_from_guest | HVMCOPY_phys, 0, NULL, false);
>   }
>   
>   enum hvm_translation_result hvm_copy_to_guest_linear(
> @@ -3283,16 +3289,17 @@ enum hvm_translation_result hvm_copy_to_guest_linear(
>   {
>       return __hvm_copy(buf, addr, size, current,
>                         HVMCOPY_to_guest | HVMCOPY_linear,
> -                      PFEC_page_present | PFEC_write_access | pfec, pfinfo);
> +                      PFEC_page_present | PFEC_write_access | pfec, pfinfo,
> +                      false);
>   }
>   
>   enum hvm_translation_result hvm_copy_from_guest_linear(
>       void *buf, unsigned long addr, int size, uint32_t pfec,
> -    pagefault_info_t *pfinfo)
> +    pagefault_info_t *pfinfo, bool send_event)
>   {
>       return __hvm_copy(buf, addr, size, current,
>                         HVMCOPY_from_guest | HVMCOPY_linear,
> -                      PFEC_page_present | pfec, pfinfo);
> +                      PFEC_page_present | pfec, pfinfo, send_event);
>   }
>   
>   unsigned long copy_to_user_hvm(void *to, const void *from, unsigned int len)
> @@ -3333,7 +3340,7 @@ unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len)
>           return 0;
>       }
>   
> -    rc = hvm_copy_from_guest_linear(to, (unsigned long)from, len, 0, NULL);
> +    rc = hvm_copy_from_guest_linear(to, (unsigned long)from, len, 0, NULL, false);
>       return rc ? len : 0; /* fake a copy_from_user() return code */
>   }
>   
> @@ -3707,7 +3714,7 @@ void hvm_ud_intercept(struct cpu_user_regs *regs)
>                                           sizeof(sig), hvm_access_insn_fetch,
>                                           cs, &addr) &&
>                (hvm_copy_from_guest_linear(sig, addr, sizeof(sig),
> -                                         walk, NULL) == HVMTRANS_okay) &&
> +                                         walk, NULL, false) == HVMTRANS_okay) &&
>                (memcmp(sig, "\xf\xbxen", sizeof(sig)) == 0) )
>           {
>               regs->rip += sizeof(sig);
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index cd6a6b382b..d0d1d7e0a7 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -1255,7 +1255,7 @@ static void svm_emul_swint_injection(struct x86_event *event)
>           goto raise_exception;
>   
>       rc = hvm_copy_from_guest_linear(&idte, idte_linear_addr, idte_size,
> -                                    PFEC_implicit, &pfinfo);
> +                                    PFEC_implicit, &pfinfo, false);
>       if ( rc )
>       {
>           if ( rc == HVMTRANS_bad_linear_to_gfn )
> diff --git a/xen/arch/x86/hvm/vm_event.c b/xen/arch/x86/hvm/vm_event.c
> index 121de23071..6d203e8db5 100644
> --- a/xen/arch/x86/hvm/vm_event.c
> +++ b/xen/arch/x86/hvm/vm_event.c
> @@ -87,7 +87,7 @@ void hvm_vm_event_do_resume(struct vcpu *v)
>               kind = EMUL_KIND_SET_CONTEXT_INSN;
>   
>           hvm_emulate_one_vm_event(kind, TRAP_invalid_op,
> -                                 X86_EVENT_NO_EC);
> +                                 X86_EVENT_NO_EC, false);
>   
>           v->arch.vm_event->emulate_flags = 0;
>       }
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 7bca572d88..04be8b98b6 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -426,7 +426,7 @@ static int decode_vmx_inst(struct cpu_user_regs *regs,
>           {
>               pagefault_info_t pfinfo;
>               int rc = hvm_copy_from_guest_linear(poperandS, base, size,
> -                                                0, &pfinfo);
> +                                                0, &pfinfo, false);
>   
>               if ( rc == HVMTRANS_bad_linear_to_gfn )
>                   hvm_inject_page_fault(pfinfo.ec, pfinfo.linear);
> diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
> index 0144f92b98..c9972bab8c 100644
> --- a/xen/arch/x86/mm/mem_access.c
> +++ b/xen/arch/x86/mm/mem_access.c
> @@ -214,7 +214,8 @@ bool p2m_mem_access_check(paddr_t gpa, unsigned long gla,
>            d->arch.monitor.inguest_pagefault_disabled &&
>            npfec.kind != npfec_kind_with_gla ) /* don't send a mem_event */
>       {
> -        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op, X86_EVENT_NO_EC);
> +        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op,
> +                                 X86_EVENT_NO_EC, true);
>   
>           return true;
>       }
> diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
> index 795201dc82..2bb80accf0 100644
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -166,7 +166,7 @@ const struct x86_emulate_ops *shadow_init_emulation(
>               hvm_access_insn_fetch, sh_ctxt, &addr) &&
>            !hvm_copy_from_guest_linear(
>                sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf),
> -             PFEC_insn_fetch, NULL))
> +             PFEC_insn_fetch, NULL, false))
>           ? sizeof(sh_ctxt->insn_buf) : 0;
>   
>       return &hvm_shadow_emulator_ops;
> @@ -201,7 +201,7 @@ void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt,
>                   hvm_access_insn_fetch, sh_ctxt, &addr) &&
>                !hvm_copy_from_guest_linear(
>                    sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf),
> -                 PFEC_insn_fetch, NULL))
> +                 PFEC_insn_fetch, NULL, false))
>               ? sizeof(sh_ctxt->insn_buf) : 0;
>           sh_ctxt->insn_buf_eip = regs->rip;
>       }
> diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
> index c6469c846c..3841d0ceb7 100644
> --- a/xen/arch/x86/mm/shadow/hvm.c
> +++ b/xen/arch/x86/mm/shadow/hvm.c
> @@ -125,7 +125,7 @@ hvm_read(enum x86_segment seg,
>       rc = hvm_copy_from_guest_linear(p_data, addr, bytes,
>                                       (access_type == hvm_access_insn_fetch
>                                        ? PFEC_insn_fetch : 0),
> -                                    &pfinfo);
> +                                    &pfinfo, false);
>   
>       switch ( rc )
>       {
> diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
> index b39a1a0331..9bed0aa83e 100644
> --- a/xen/include/asm-x86/hvm/emulate.h
> +++ b/xen/include/asm-x86/hvm/emulate.h
> @@ -47,6 +47,7 @@ struct hvm_emulate_ctxt {
>       uint32_t intr_shadow;
>   
>       bool_t set_context;
> +    bool send_event;
>   };
>   
>   enum emul_kind {
> @@ -63,7 +64,8 @@ int hvm_emulate_one(
>       struct hvm_emulate_ctxt *hvmemul_ctxt);
>   void hvm_emulate_one_vm_event(enum emul_kind kind,
>       unsigned int trapnr,
> -    unsigned int errcode);
> +    unsigned int errcode,
> +    bool send_event);
>   /* Must be called once to set up hvmemul state. */
>   void hvm_emulate_init_once(
>       struct hvm_emulate_ctxt *hvmemul_ctxt,
> @@ -80,6 +82,11 @@ struct segment_register *hvmemul_get_seg_reg(
>       enum x86_segment seg,
>       struct hvm_emulate_ctxt *hvmemul_ctxt);
>   int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla);
> +bool hvm_emulate_send_vm_event(
> +    unsigned long gla,
> +    gfn_t gfn,
> +    uint32_t pfec,
> +    bool send_event);
>   
>   static inline bool handle_mmio(void)
>   {
> diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
> index e989aa7349..914f388921 100644
> --- a/xen/include/asm-x86/hvm/support.h
> +++ b/xen/include/asm-x86/hvm/support.h
> @@ -99,7 +99,7 @@ enum hvm_translation_result hvm_copy_to_guest_linear(
>       pagefault_info_t *pfinfo);
>   enum hvm_translation_result hvm_copy_from_guest_linear(
>       void *buf, unsigned long addr, int size, uint32_t pfec,
> -    pagefault_info_t *pfinfo);
> +    pagefault_info_t *pfinfo, bool send_event);
>   
>   /*
>    * Get a reference on the page under an HVM physical or linear address.  If
> 
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-02  8:01           ` Jan Beulich
@ 2019-07-02  8:10             ` Alexandru Stefan ISAILA
  0 siblings, 0 replies; 13+ messages in thread
From: Alexandru Stefan ISAILA @ 2019-07-02  8:10 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, kevin.tian, tamas, jun.nakajima, rcojocaru,
	George Dunlap, Andrew Cooper, Tim Deegan, PaulDurrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	RogerPauMonne



On 02.07.2019 11:01, Jan Beulich wrote:
> On 02.07.2019 09:58, Alexandru Stefan ISAILA wrote:
>>
>>
>> On 01.07.2019 18:53, Jan Beulich wrote:
>>> On 01.07.2019 17:36, Alexandru Stefan ISAILA wrote:
>>>> On 01.07.2019 17:55, Jan Beulich wrote:
>>>>> On 01.07.2019 16:45, Alexandru Stefan ISAILA wrote:
>>>>>> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>>>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>>>>>>> +    if ( !req.u.mem_access.flags )
>>>>>>>> +        return false; /* no violation */
>>>>>>>
>>>>>>> How is the "false" here (I think this is the one the description talks
>>>>>>> about) matching up with the various other ones in the function?
>>>>>>
>>>>>> There should be no event if there is no access violation. So in this
>>>>>> case the emulation is continued as expected.
>>>>>
>>>>> But this doesn't answer my question: You use "false" as return value
>>>>> to indicate different things. Only the one here means "no access
>>>>> violation".
>>>>
>>>> Sorry about that, since this will remain the only return false apart
>>>> form the main one (return monitor_traps()), false  = event was not sent
>>>> and true = event was sent.
>>>>
>>>> I understand that you are asking about the scenario when there was a
>>>> violation and the event was not sent. Then I can issue a domain_crash()
>>>> as that is potentially a big issue.
>>>>
>>>> I hope I got that correctly.
>>>
>>> I don't get the impression that you did. I count a total of four
>>> "return false" in the function, only one of which explicitly means
>>> "no access violation" (others may have that meaning implicitly). Let's
>>> take the p2m_get_mem_access() failure case as an example: What I don't
>>> understand is why this case and the "no access violation" one are both
>>> meant to be treated the same.
>>
>> Right, at the moment, false means that emulation should continue and
>> true means that emulation should stop. If it is a must that I return
>> different errors I will change that in the next version but in the way
>> that the code is right now they will be treated the same way.
> 
> Again - it's not a requirement. It depends on whether the behavior is
> intended to be that way. If it is, it should be clarified in the
> description or maybe better in a code comment. But to me, without such
> a clarification, it doesn't look like it should be that way.
> 

Short answer is yes, the behavior is the same and it does not need any 
differentiation, I will clarify this in a comment and in the commit 
comment, sorry for misunderstanding the first comment.

Regards,
Alex
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-02  7:58         ` Alexandru Stefan ISAILA
@ 2019-07-02  8:01           ` Jan Beulich
  2019-07-02  8:10             ` Alexandru Stefan ISAILA
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2019-07-02  8:01 UTC (permalink / raw)
  To: Alexandru Stefan ISAILA
  Cc: Wei Liu, kevin.tian, tamas, jun.nakajima, rcojocaru,
	George Dunlap, Andrew Cooper, Tim Deegan, PaulDurrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	RogerPauMonne

On 02.07.2019 09:58, Alexandru Stefan ISAILA wrote:
> 
> 
> On 01.07.2019 18:53, Jan Beulich wrote:
>> On 01.07.2019 17:36, Alexandru Stefan ISAILA wrote:
>>> On 01.07.2019 17:55, Jan Beulich wrote:
>>>> On 01.07.2019 16:45, Alexandru Stefan ISAILA wrote:
>>>>> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>>>>>> +    if ( !req.u.mem_access.flags )
>>>>>>> +        return false; /* no violation */
>>>>>>
>>>>>> How is the "false" here (I think this is the one the description talks
>>>>>> about) matching up with the various other ones in the function?
>>>>>
>>>>> There should be no event if there is no access violation. So in this
>>>>> case the emulation is continued as expected.
>>>>
>>>> But this doesn't answer my question: You use "false" as return value
>>>> to indicate different things. Only the one here means "no access
>>>> violation".
>>>
>>> Sorry about that, since this will remain the only return false apart
>>> form the main one (return monitor_traps()), false  = event was not sent
>>> and true = event was sent.
>>>
>>> I understand that you are asking about the scenario when there was a
>>> violation and the event was not sent. Then I can issue a domain_crash()
>>> as that is potentially a big issue.
>>>
>>> I hope I got that correctly.
>>
>> I don't get the impression that you did. I count a total of four
>> "return false" in the function, only one of which explicitly means
>> "no access violation" (others may have that meaning implicitly). Let's
>> take the p2m_get_mem_access() failure case as an example: What I don't
>> understand is why this case and the "no access violation" one are both
>> meant to be treated the same.
> 
> Right, at the moment, false means that emulation should continue and
> true means that emulation should stop. If it is a must that I return
> different errors I will change that in the next version but in the way
> that the code is right now they will be treated the same way.

Again - it's not a requirement. It depends on whether the behavior is
intended to be that way. If it is, it should be clarified in the
description or maybe better in a code comment. But to me, without such
a clarification, it doesn't look like it should be that way.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-01 15:53       ` Jan Beulich
@ 2019-07-02  7:58         ` Alexandru Stefan ISAILA
  2019-07-02  8:01           ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Alexandru Stefan ISAILA @ 2019-07-02  7:58 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, kevin.tian, tamas, jun.nakajima, rcojocaru,
	George Dunlap, Andrew Cooper, Tim Deegan, PaulDurrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	Roger PauMonne



On 01.07.2019 18:53, Jan Beulich wrote:
> On 01.07.2019 17:36, Alexandru Stefan ISAILA wrote:
>> On 01.07.2019 17:55, Jan Beulich wrote:
>>> On 01.07.2019 16:45, Alexandru Stefan ISAILA wrote:
>>>> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>>>>> +    if ( !req.u.mem_access.flags )
>>>>>> +        return false; /* no violation */
>>>>>
>>>>> How is the "false" here (I think this is the one the description talks
>>>>> about) matching up with the various other ones in the function?
>>>>
>>>> There should be no event if there is no access violation. So in this
>>>> case the emulation is continued as expected.
>>>
>>> But this doesn't answer my question: You use "false" as return value
>>> to indicate different things. Only the one here means "no access
>>> violation".
>>
>> Sorry about that, since this will remain the only return false apart
>> form the main one (return monitor_traps()), false  = event was not sent
>> and true = event was sent.
>>
>> I understand that you are asking about the scenario when there was a
>> violation and the event was not sent. Then I can issue a domain_crash()
>> as that is potentially a big issue.
>>
>> I hope I got that correctly.
> 
> I don't get the impression that you did. I count a total of four
> "return false" in the function, only one of which explicitly means
> "no access violation" (others may have that meaning implicitly). Let's
> take the p2m_get_mem_access() failure case as an example: What I don't
> understand is why this case and the "no access violation" one are both
> meant to be treated the same.

Right, at the moment, false means that emulation should continue and 
true means that emulation should stop. If it is a must that I return 
different errors I will change that in the next version but in the way 
that the code is right now they will be treated the same way.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-01 15:36     ` Alexandru Stefan ISAILA
@ 2019-07-01 15:53       ` Jan Beulich
  2019-07-02  7:58         ` Alexandru Stefan ISAILA
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2019-07-01 15:53 UTC (permalink / raw)
  To: Alexandru Stefan ISAILA
  Cc: Wei Liu, kevin.tian, tamas, jun.nakajima, rcojocaru,
	George Dunlap, Andrew Cooper, Tim Deegan, PaulDurrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	Roger PauMonne

On 01.07.2019 17:36, Alexandru Stefan ISAILA wrote:
> On 01.07.2019 17:55, Jan Beulich wrote:
>> On 01.07.2019 16:45, Alexandru Stefan ISAILA wrote:
>>> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>>>> +    if ( !req.u.mem_access.flags )
>>>>> +        return false; /* no violation */
>>>>
>>>> How is the "false" here (I think this is the one the description talks
>>>> about) matching up with the various other ones in the function?
>>>
>>> There should be no event if there is no access violation. So in this
>>> case the emulation is continued as expected.
>>
>> But this doesn't answer my question: You use "false" as return value
>> to indicate different things. Only the one here means "no access
>> violation".
> 
> Sorry about that, since this will remain the only return false apart
> form the main one (return monitor_traps()), false  = event was not sent
> and true = event was sent.
> 
> I understand that you are asking about the scenario when there was a
> violation and the event was not sent. Then I can issue a domain_crash()
> as that is potentially a big issue.
> 
> I hope I got that correctly.

I don't get the impression that you did. I count a total of four
"return false" in the function, only one of which explicitly means
"no access violation" (others may have that meaning implicitly). Let's
take the p2m_get_mem_access() failure case as an example: What I don't
understand is why this case and the "no access violation" one are both
meant to be treated the same.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-01 14:55   ` Jan Beulich
@ 2019-07-01 15:36     ` Alexandru Stefan ISAILA
  2019-07-01 15:53       ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Alexandru Stefan ISAILA @ 2019-07-01 15:36 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Wei Liu, kevin.tian, tamas, jun.nakajima, rcojocaru,
	George Dunlap, Andrew Cooper, Tim Deegan, PaulDurrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	Roger Pau Monne



On 01.07.2019 17:55, Jan Beulich wrote:
> On 01.07.2019 16:45, Alexandru Stefan ISAILA wrote:
>> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>>> +    if ( !send_event || !pfec )
>>>> +        return false;
>>>
>>> I think I've said before that the !pfec part need an explanation (in
>>> a comment). Without such an explanation I'm inclined to say it
>>> should be deleted. If otoh this is simply mean to be a shortcut,
>>> then you should really just check the two bits you actually care
>>> about further down.
>>
>> The pfec check is done because I encountered pfec 0 in tests. It could
>> save some work if pfec == 0 when calling the function. Is there
>> a must in having this check removed? If so then it can be done. The
>> send_event will be checked before calling the function as you said.
> 
> It's not a requirement for it to be removed, _if_ there's a good
> reason for it to be there. I don't, however, see how pfec=0 could
> be a problem - afaict it would return false a few lines further
> down in that case.

You are right, pfec=0 wold not be a problem and it will be caught in the 
no violation if.

> 
>>>> +    if ( !req.u.mem_access.flags )
>>>> +        return false; /* no violation */
>>>
>>> How is the "false" here (I think this is the one the description talks
>>> about) matching up with the various other ones in the function?
>>
>> There should be no event if there is no access violation. So in this
>> case the emulation is continued as expected.
> 
> But this doesn't answer my question: You use "false" as return value
> to indicate different things. Only the one here means "no access
> violation".

Sorry about that, since this will remain the only return false apart 
form the main one (return monitor_traps()), false  = event was not sent 
and true = event was sent.

I understand that you are asking about the scenario when there was a 
violation and the event was not sent. Then I can issue a domain_crash() 
as that is potentially a big issue.

I hope I got that correctly.

> 
>>>> @@ -615,6 +669,13 @@ static void *hvmemul_map_linear_addr(
>>>>     
>>>>             if ( pfec & PFEC_write_access )
>>>>             {
>>>> +            if ( hvm_emulate_send_vm_event(addr, gfn, pfec,
>>>> +                                           hvmemul_ctxt->send_event) )
>>>> +            {
>>>> +                err = ERR_PTR(~X86EMUL_RETRY);
>>>> +                goto out;
>>>> +            }
>>>
>>> How come this sits only on the write path?
>>
>> We are interested only for the write access on this path. This also
>> ensures that pfec is set.
> 
> I'm sorry, but the event sending should not be tailored to what you
> need or want. Or if so (i.e. if agreed upon among the VM event
> maintainers) then this restriction should be clearly spelled out.
> 

On the other hand, this can go outside the write path with no effect on 
the functionality of this send_event feature.

I will move it after the if(write) in the next version.


Thanks,
Alex
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-01 15:13   ` Razvan Cojocaru
@ 2019-07-01 15:32     ` Jan Beulich
  0 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2019-07-01 15:32 UTC (permalink / raw)
  To: Razvan Cojocaru, Alexandru Stefan ISAILA
  Cc: kevin.tian, tamas, jun.nakajima, Wei Liu, George Dunlap,
	Andrew Cooper, Tim Deegan, Paul Durrant, suravee.suthikulpanit,
	xen-devel, Boris Ostrovsky, brian.woods, Roger Pau Monne

On 01.07.2019 17:13, Razvan Cojocaru wrote:
> On 7/1/19 5:45 PM, Alexandru Stefan ISAILA wrote:
>>
>>
>> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>>> This patch aims to have mem access vm events sent from the emulator.
>>>> This is useful where we want to only emulate a page walk without
>>>> checking the EPT, but we still want to check the EPT when emulating
>>>> the instruction that caused the page walk. In this case, the original
>>>> EPT fault is caused by the walk trying to set the accessed or dirty
>>>> bits, but executing the instruction itself might also cause an EPT
>>>> fault if permitted to run, and this second fault should not be lost.
>>>
>>> I'm afraid I still can't translate this into what exactly is wanted and
>>> why. While typically we don't use examples to demonstrate that is
>>> wanted in commit messages, I think this is a rather good candidate
>>> for actually using such an approach. This may then ...
>>>
>>>> We use hvmemul_map_linear_addr() to intercept r/w access and
>>>> __hvm_copy() to intercept exec access.
>>>>
>>>> First we try to send a vm event and if the event is sent then emulation
>>>> returns X86EMUL_RETRY in order to stop emulation on instructions that
>>>> use access protected pages. If the event is not sent then the
>>>> emulation goes on as expected.
>>>
>>> ... also help understanding this part, which I continue to be confused
>>> by, too.
> 
> Simply put, the introspection agent wants to treat all A/D bit writes as benign. Receiving vm_events about them is a big pessimization, and we want to optimize.
> 
> We'd like to filter these events out.
> 
> To this end, we're currently (in the present incarnation of the Xen code) fully emulating the instruction at RIP when the hardware sees an EPT fault with npfec.kind != npfec_kind_with_gla. This is, however, incorrect, because the instruction at RIP might legitimately cause an EPT fault of its own (while accessing a _different_ page from the original one, where A/D were set).
> 
> We've tried to solve this by actually writing the A/D bits and returning from p2m_mem_access_check(), however that has led to a slightly philosophical discussion about the proper way to achieve our goals while taking into account speculative setting of these bits. The issues raised have not been satisfactorily answered in an authoritative manner to this day.
> 
> So the only option we seem to have left at this point is to perform the whole emulation, _while_ ignoring EPT restrictions for the walk part, and taking them into account for the "actual" emulating of the instruction @ RIP.
> 
> If we're sending out a vm_event, then we don't want the emulation to complete - since in that case we won't be able to veto whatever is doing. That would mean that we can't actually prevent any malicious activity that happens here, instead we'd only be able to report on it.
> 
> So when we see a "send-vm_event" case while emulating, we need to do two things:
> 
> 1. send the event out;
> 2. _don't_ complete the emulation (return X86EMUL_RETRY).
> 
> When 2. happens, we'll hit hvm_vm_event_do_resume() again _after_ the introspection agent treats the event and resumes the guest. There, the instruction at RIP will be fully emulated (with the EPT ignored) if the introspection application allows it, and the guest will continue to run past the instruction.
> 
> That's our plan, anyway. Hopefully we're not going in a wrong direction about it (all help is very much appreciated).
> 
> I hope this has made thing clearer.

I think so, yes, and I'm getting the impression that in reply to an
earlier version you've already once explained this. The thing is -
the explanation needs to come _with_ the patch, not _after_ it.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-01 14:45 ` Alexandru Stefan ISAILA
  2019-07-01 14:55   ` Jan Beulich
@ 2019-07-01 15:13   ` Razvan Cojocaru
  2019-07-01 15:32     ` Jan Beulich
  1 sibling, 1 reply; 13+ messages in thread
From: Razvan Cojocaru @ 2019-07-01 15:13 UTC (permalink / raw)
  To: Alexandru Stefan ISAILA, Jan Beulich
  Cc: kevin.tian, tamas, jun.nakajima, Wei Liu, George Dunlap,
	Andrew Cooper, Tim Deegan, Paul Durrant, suravee.suthikulpanit,
	xen-devel, Boris Ostrovsky, brian.woods, Roger Pau Monne

On 7/1/19 5:45 PM, Alexandru Stefan ISAILA wrote:
> 
> 
> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>> This patch aims to have mem access vm events sent from the emulator.
>>> This is useful where we want to only emulate a page walk without
>>> checking the EPT, but we still want to check the EPT when emulating
>>> the instruction that caused the page walk. In this case, the original
>>> EPT fault is caused by the walk trying to set the accessed or dirty
>>> bits, but executing the instruction itself might also cause an EPT
>>> fault if permitted to run, and this second fault should not be lost.
>>
>> I'm afraid I still can't translate this into what exactly is wanted and
>> why. While typically we don't use examples to demonstrate that is
>> wanted in commit messages, I think this is a rather good candidate
>> for actually using such an approach. This may then ...
>>
>>> We use hvmemul_map_linear_addr() to intercept r/w access and
>>> __hvm_copy() to intercept exec access.
>>>
>>> First we try to send a vm event and if the event is sent then emulation
>>> returns X86EMUL_RETRY in order to stop emulation on instructions that
>>> use access protected pages. If the event is not sent then the
>>> emulation goes on as expected.
>>
>> ... also help understanding this part, which I continue to be confused
>> by, too.

Simply put, the introspection agent wants to treat all A/D bit writes as 
benign. Receiving vm_events about them is a big pessimization, and we 
want to optimize.

We'd like to filter these events out.

To this end, we're currently (in the present incarnation of the Xen 
code) fully emulating the instruction at RIP when the hardware sees an 
EPT fault with npfec.kind != npfec_kind_with_gla. This is, however, 
incorrect, because the instruction at RIP might legitimately cause an 
EPT fault of its own (while accessing a _different_ page from the 
original one, where A/D were set).

We've tried to solve this by actually writing the A/D bits and returning 
from p2m_mem_access_check(), however that has led to a slightly 
philosophical discussion about the proper way to achieve our goals while 
taking into account speculative setting of these bits. The issues raised 
have not been satisfactorily answered in an authoritative manner to this 
day.

So the only option we seem to have left at this point is to perform the 
whole emulation, _while_ ignoring EPT restrictions for the walk part, 
and taking them into account for the "actual" emulating of the 
instruction @ RIP.

If we're sending out a vm_event, then we don't want the emulation to 
complete - since in that case we won't be able to veto whatever is 
doing. That would mean that we can't actually prevent any malicious 
activity that happens here, instead we'd only be able to report on it.

So when we see a "send-vm_event" case while emulating, we need to do two 
things:

1. send the event out;
2. _don't_ complete the emulation (return X86EMUL_RETRY).

When 2. happens, we'll hit hvm_vm_event_do_resume() again _after_ the 
introspection agent treats the event and resumes the guest. There, the 
instruction at RIP will be fully emulated (with the EPT ignored) if the 
introspection application allows it, and the guest will continue to run 
past the instruction.

That's our plan, anyway. Hopefully we're not going in a wrong direction 
about it (all help is very much appreciated).

I hope this has made thing clearer. We're not after anything out of the 
ordinary or fitting a niche case - we just want to filter out vm_events 
caused by page walks in a manner that remains safe for the guest OS user.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-01 14:45 ` Alexandru Stefan ISAILA
@ 2019-07-01 14:55   ` Jan Beulich
  2019-07-01 15:36     ` Alexandru Stefan ISAILA
  2019-07-01 15:13   ` Razvan Cojocaru
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2019-07-01 14:55 UTC (permalink / raw)
  To: Alexandru Stefan ISAILA
  Cc: Wei Liu, kevin.tian, tamas, jun.nakajima, rcojocaru,
	George Dunlap, Andrew Cooper, Tim Deegan, PaulDurrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	Roger Pau Monne

On 01.07.2019 16:45, Alexandru Stefan ISAILA wrote:
> On 01.07.2019 16:13, Jan Beulich wrote:
>>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>>> +    if ( !send_event || !pfec )
>>> +        return false;
>>
>> I think I've said before that the !pfec part need an explanation (in
>> a comment). Without such an explanation I'm inclined to say it
>> should be deleted. If otoh this is simply mean to be a shortcut,
>> then you should really just check the two bits you actually care
>> about further down.
> 
> The pfec check is done because I encountered pfec 0 in tests. It could
> save some work if pfec == 0 when calling the function. Is there
> a must in having this check removed? If so then it can be done. The
> send_event will be checked before calling the function as you said.

It's not a requirement for it to be removed, _if_ there's a good
reason for it to be there. I don't, however, see how pfec=0 could
be a problem - afaict it would return false a few lines further
down in that case.

>>> +    if ( !req.u.mem_access.flags )
>>> +        return false; /* no violation */
>>
>> How is the "false" here (I think this is the one the description talks
>> about) matching up with the various other ones in the function?
> 
> There should be no event if there is no access violation. So in this
> case the emulation is continued as expected.

But this doesn't answer my question: You use "false" as return value
to indicate different things. Only the one here means "no access
violation".

>>> @@ -615,6 +669,13 @@ static void *hvmemul_map_linear_addr(
>>>    
>>>            if ( pfec & PFEC_write_access )
>>>            {
>>> +            if ( hvm_emulate_send_vm_event(addr, gfn, pfec,
>>> +                                           hvmemul_ctxt->send_event) )
>>> +            {
>>> +                err = ERR_PTR(~X86EMUL_RETRY);
>>> +                goto out;
>>> +            }
>>
>> How come this sits only on the write path?
> 
> We are interested only for the write access on this path. This also
> ensures that pfec is set.

I'm sorry, but the event sending should not be tailored to what you
need or want. Or if so (i.e. if agreed upon among the VM event
maintainers) then this restriction should be clearly spelled out.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
  2019-07-01 13:13 Jan Beulich
@ 2019-07-01 14:45 ` Alexandru Stefan ISAILA
  2019-07-01 14:55   ` Jan Beulich
  2019-07-01 15:13   ` Razvan Cojocaru
  0 siblings, 2 replies; 13+ messages in thread
From: Alexandru Stefan ISAILA @ 2019-07-01 14:45 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, tamas, rcojocaru, Wei Liu, jun.nakajima,
	Andrew Cooper, Tim Deegan, George Dunlap, Paul Durrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	Roger Pau Monne



On 01.07.2019 16:13, Jan Beulich wrote:
>>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
>> This patch aims to have mem access vm events sent from the emulator.
>> This is useful where we want to only emulate a page walk without
>> checking the EPT, but we still want to check the EPT when emulating
>> the instruction that caused the page walk. In this case, the original
>> EPT fault is caused by the walk trying to set the accessed or dirty
>> bits, but executing the instruction itself might also cause an EPT
>> fault if permitted to run, and this second fault should not be lost.
> 
> I'm afraid I still can't translate this into what exactly is wanted and
> why. While typically we don't use examples to demonstrate that is
> wanted in commit messages, I think this is a rather good candidate
> for actually using such an approach. This may then ...
> 
>> We use hvmemul_map_linear_addr() to intercept r/w access and
>> __hvm_copy() to intercept exec access.
>>
>> First we try to send a vm event and if the event is sent then emulation
>> returns X86EMUL_RETRY in order to stop emulation on instructions that
>> use access protected pages. If the event is not sent then the
>> emulation goes on as expected.
> 
> ... also help understanding this part, which I continue to be confused
> by, too.
> 
>> @@ -530,6 +532,57 @@ static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
>>       return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
>>   }
>>   
>> +bool hvm_emulate_send_vm_event(unsigned long gla, gfn_t gfn,
>> +                               uint32_t pfec, bool send_event)
>> +{
>> +    xenmem_access_t access;
>> +    vm_event_request_t req = {};
>> +    paddr_t gpa = ((gfn_x(gfn) << PAGE_SHIFT) | (gla & ~PAGE_MASK));
> 
> gfn_to_gaddr()

I will use that

> 
>> +    if ( !send_event || !pfec )
>> +        return false;
> 
> I think I've said before that the !pfec part need an explanation (in
> a comment). Without such an explanation I'm inclined to say it
> should be deleted. If otoh this is simply mean to be a shortcut,
> then you should really just check the two bits you actually care
> about further down.

The pfec check is done because I encountered pfec 0 in tests. It could 
save some work if pfec == 0 when calling the function. Is there
a must in having this check removed? If so then it can be done. The 
send_event will be checked before calling the function as you said.

> 
> Similarly I think I've said before that I'm not happy for the common
> case to now be to call into here just to bail back out (when VM
> events are disabled on a guest). IOW I don't think you should call
> into this function in the first place when "send_event" is false.
> 
>> +    if ( p2m_get_mem_access(current->domain, gfn, &access,
>> +                            altp2m_vcpu_idx(current)) != 0 )
>> +        return false;
>> +
>> +    switch ( access ) {
> 
> Style.
> 
>> +    case XENMEM_access_x:
>> +    case XENMEM_access_rx:
>> +        if ( pfec & PFEC_write_access )
>> +            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
>> +        break;
>> +
>> +    case XENMEM_access_w:
>> +    case XENMEM_access_rw:
>> +        if ( pfec & PFEC_insn_fetch )
>> +            req.u.mem_access.flags = MEM_ACCESS_X;
>> +        break;
>> +
>> +    case XENMEM_access_r:
>> +    case XENMEM_access_n:
>> +        if ( pfec & PFEC_write_access )
>> +            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
>> +        if ( pfec & PFEC_insn_fetch )
>> +            req.u.mem_access.flags |= MEM_ACCESS_X;
>> +        break;
>> +
>> +    default:
>> +        return false;
> 
> I think it would be more future proof to not have a default case
> here: When a new access enumerator gets introduced, most
> compilers would tell the developer right away that this new
> enumerator value needs actively handling here.

I will cut the default case and treat all of the enum members.

> 
>> +    }
>> +
>> +    if ( !req.u.mem_access.flags )
>> +        return false; /* no violation */
> 
> How is the "false" here (I think this is the one the description talks
> about) matching up with the various other ones in the function?

There should be no event if there is no access violation. So in this 
case the emulation is continued as expected.

> 
>> @@ -615,6 +669,13 @@ static void *hvmemul_map_linear_addr(
>>   
>>           if ( pfec & PFEC_write_access )
>>           {
>> +            if ( hvm_emulate_send_vm_event(addr, gfn, pfec,
>> +                                           hvmemul_ctxt->send_event) )
>> +            {
>> +                err = ERR_PTR(~X86EMUL_RETRY);
>> +                goto out;
>> +            }
> 
> How come this sits only on the write path?

We are interested only for the write access on this path. This also 
ensures that pfec is set.

> 
>> @@ -1115,7 +1176,8 @@ static int linear_read(unsigned long addr, unsigned int bytes, void *p_data,
>>        * clean up any interim state.
>>        */
>>       if ( !hvmemul_find_mmio_cache(vio, addr, IOREQ_READ, false) )
>> -        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
>> +        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo,
>> +                                        hvmemul_ctxt->send_event);
> 
> I'm not very happy to see this new parameter/argument addition.
> Did you consider putting the flag of interest elsewhere (into a
> structure hanging off of current, or into pagefault_info_t)?
> 
> Furthermore, if the parameter is really unavoidable, then please
> separate the mechanics of introducing it from the actual change
> you're after.

I could move the flag to vcpu.arch.vm_event.send_event.

> 
>> @@ -2629,7 +2692,7 @@ void hvm_emulate_init_per_insn(
>>                hvm_copy_from_guest_linear(hvmemul_ctxt->insn_buf, addr,
>>                                           sizeof(hvmemul_ctxt->insn_buf),
>>                                           pfec | PFEC_insn_fetch,
>> -                                        NULL) == HVMTRANS_okay) ?
>> +                                        NULL, false) == HVMTRANS_okay) ?
> 
> If you pass false here, what's the point of handling insn fetches
> in the new function you add?

This is a good question. By looking at the flow I think I should use 
call the send_event here also. Assuming that having 
hvmemul_ctxt->insn_buf_bytes = 0 if the event was send is enough to have 
the emulation stop then I will give this a test.

Thanks,
Alex
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate
@ 2019-07-01 13:13 Jan Beulich
  2019-07-01 14:45 ` Alexandru Stefan ISAILA
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2019-07-01 13:13 UTC (permalink / raw)
  To: Alexandru Stefan ISAILA
  Cc: kevin.tian, tamas, rcojocaru, Wei Liu, jun.nakajima,
	Andrew Cooper, Tim Deegan, George Dunlap, Paul Durrant,
	suravee.suthikulpanit, xen-devel, Boris Ostrovsky, brian.woods,
	Roger Pau Monne

>>> On 04.06.19 at 13:49, <aisaila@bitdefender.com> wrote:
> This patch aims to have mem access vm events sent from the emulator.
> This is useful where we want to only emulate a page walk without
> checking the EPT, but we still want to check the EPT when emulating
> the instruction that caused the page walk. In this case, the original
> EPT fault is caused by the walk trying to set the accessed or dirty
> bits, but executing the instruction itself might also cause an EPT
> fault if permitted to run, and this second fault should not be lost.

I'm afraid I still can't translate this into what exactly is wanted and
why. While typically we don't use examples to demonstrate that is
wanted in commit messages, I think this is a rather good candidate
for actually using such an approach. This may then ...

> We use hvmemul_map_linear_addr() to intercept r/w access and
> __hvm_copy() to intercept exec access.
> 
> First we try to send a vm event and if the event is sent then emulation
> returns X86EMUL_RETRY in order to stop emulation on instructions that
> use access protected pages. If the event is not sent then the
> emulation goes on as expected.

... also help understanding this part, which I continue to be confused
by, too.

> @@ -530,6 +532,57 @@ static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
>      return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
>  }
>  
> +bool hvm_emulate_send_vm_event(unsigned long gla, gfn_t gfn,
> +                               uint32_t pfec, bool send_event)
> +{
> +    xenmem_access_t access;
> +    vm_event_request_t req = {};
> +    paddr_t gpa = ((gfn_x(gfn) << PAGE_SHIFT) | (gla & ~PAGE_MASK));

gfn_to_gaddr()

> +    if ( !send_event || !pfec )
> +        return false;

I think I've said before that the !pfec part need an explanation (in
a comment). Without such an explanation I'm inclined to say it
should be deleted. If otoh this is simply mean to be a shortcut,
then you should really just check the two bits you actually care
about further down.

Similarly I think I've said before that I'm not happy for the common
case to now be to call into here just to bail back out (when VM
events are disabled on a guest). IOW I don't think you should call
into this function in the first place when "send_event" is false.

> +    if ( p2m_get_mem_access(current->domain, gfn, &access,
> +                            altp2m_vcpu_idx(current)) != 0 )
> +        return false;
> +
> +    switch ( access ) {

Style.

> +    case XENMEM_access_x:
> +    case XENMEM_access_rx:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags = MEM_ACCESS_R | MEM_ACCESS_W;
> +        break;
> +
> +    case XENMEM_access_w:
> +    case XENMEM_access_rw:
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags = MEM_ACCESS_X;
> +        break;
> +
> +    case XENMEM_access_r:
> +    case XENMEM_access_n:
> +        if ( pfec & PFEC_write_access )
> +            req.u.mem_access.flags |= MEM_ACCESS_R | MEM_ACCESS_W;
> +        if ( pfec & PFEC_insn_fetch )
> +            req.u.mem_access.flags |= MEM_ACCESS_X;
> +        break;
> +
> +    default:
> +        return false;

I think it would be more future proof to not have a default case
here: When a new access enumerator gets introduced, most
compilers would tell the developer right away that this new
enumerator value needs actively handling here.

> +    }
> +
> +    if ( !req.u.mem_access.flags )
> +        return false; /* no violation */

How is the "false" here (I think this is the one the description talks
about) matching up with the various other ones in the function?

> @@ -615,6 +669,13 @@ static void *hvmemul_map_linear_addr(
>  
>          if ( pfec & PFEC_write_access )
>          {
> +            if ( hvm_emulate_send_vm_event(addr, gfn, pfec,
> +                                           hvmemul_ctxt->send_event) )
> +            {
> +                err = ERR_PTR(~X86EMUL_RETRY);
> +                goto out;
> +            }

How come this sits only on the write path?

> @@ -1115,7 +1176,8 @@ static int linear_read(unsigned long addr, unsigned int bytes, void *p_data,
>       * clean up any interim state.
>       */
>      if ( !hvmemul_find_mmio_cache(vio, addr, IOREQ_READ, false) )
> -        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
> +        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo,
> +                                        hvmemul_ctxt->send_event);

I'm not very happy to see this new parameter/argument addition.
Did you consider putting the flag of interest elsewhere (into a
structure hanging off of current, or into pagefault_info_t)?

Furthermore, if the parameter is really unavoidable, then please
separate the mechanics of introducing it from the actual change
you're after.

> @@ -2629,7 +2692,7 @@ void hvm_emulate_init_per_insn(
>               hvm_copy_from_guest_linear(hvmemul_ctxt->insn_buf, addr,
>                                          sizeof(hvmemul_ctxt->insn_buf),
>                                          pfec | PFEC_insn_fetch,
> -                                        NULL) == HVMTRANS_okay) ?
> +                                        NULL, false) == HVMTRANS_okay) ?

If you pass false here, what's the point of handling insn fetches
in the new function you add?

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-07-02  8:11 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-04 11:49 [Xen-devel] [PATCH v5] x86/emulate: Send vm_event from emulate Alexandru Stefan ISAILA
2019-06-11 12:45 ` Paul Durrant
2019-06-12  9:27 ` Alexandru Stefan ISAILA
2019-07-01 13:13 Jan Beulich
2019-07-01 14:45 ` Alexandru Stefan ISAILA
2019-07-01 14:55   ` Jan Beulich
2019-07-01 15:36     ` Alexandru Stefan ISAILA
2019-07-01 15:53       ` Jan Beulich
2019-07-02  7:58         ` Alexandru Stefan ISAILA
2019-07-02  8:01           ` Jan Beulich
2019-07-02  8:10             ` Alexandru Stefan ISAILA
2019-07-01 15:13   ` Razvan Cojocaru
2019-07-01 15:32     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.