All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
@ 2022-04-13 13:41 Tamas K Lengyel
  2022-04-13 13:41 ` [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits Tamas K Lengyel
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-13 13:41 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, Anthony PERARD, Juergen Gross,
	Andrew Cooper, George Dunlap, Jan Beulich, Julien Grall,
	Stefano Stabellini, Roger Pau Monné,
	Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu

Allow specify distinct parts of the fork VM to be reset. This is useful when a
fuzzing operation involves mapping in only a handful of pages that are known
ahead of time. Throwing these pages away just to be re-copied immediately is
expensive, thus allowing to specify partial resets can speed things up.

Also allow resetting to be initiated from vm_event responses as an
optiomization.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
v4: No change
v3: Rebase on simpler approach after dropping empty_p2m feature
v2: address review comments and add more sanity checking
---
 tools/include/xenctrl.h                |  3 ++-
 tools/libs/ctrl/xc_memshr.c            |  7 ++++++-
 xen/arch/x86/include/asm/mem_sharing.h |  9 +++++++++
 xen/arch/x86/mm/mem_sharing.c          | 24 +++++++++++++++++++-----
 xen/common/vm_event.c                  | 15 +++++++++++++++
 xen/include/public/memory.h            |  4 +++-
 xen/include/public/vm_event.h          |  8 ++++++++
 7 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 95bd5eca67..1b089a2c02 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -2290,7 +2290,8 @@ int xc_memshr_fork(xc_interface *xch,
  *
  * With VMs that have a lot of memory this call may block for a long time.
  */
-int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain);
+int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain,
+                         bool reset_state, bool reset_memory);
 
 /* Debug calls: return the number of pages referencing the shared frame backing
  * the input argument. Should be one or greater.
diff --git a/tools/libs/ctrl/xc_memshr.c b/tools/libs/ctrl/xc_memshr.c
index a6cfd7dccf..a0d0b894e2 100644
--- a/tools/libs/ctrl/xc_memshr.c
+++ b/tools/libs/ctrl/xc_memshr.c
@@ -257,12 +257,17 @@ int xc_memshr_fork(xc_interface *xch, uint32_t pdomid, uint32_t domid,
     return xc_memshr_memop(xch, domid, &mso);
 }
 
-int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid)
+int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid, bool reset_state,
+                         bool reset_memory)
 {
     xen_mem_sharing_op_t mso;
 
     memset(&mso, 0, sizeof(mso));
     mso.op = XENMEM_sharing_op_fork_reset;
+    if ( reset_state )
+        mso.u.fork.flags |= XENMEM_FORK_RESET_STATE;
+    if ( reset_memory )
+        mso.u.fork.flags |= XENMEM_FORK_RESET_MEMORY;
 
     return xc_memshr_memop(xch, domid, &mso);
 }
diff --git a/xen/arch/x86/include/asm/mem_sharing.h b/xen/arch/x86/include/asm/mem_sharing.h
index cf7a12f4d2..2c00069bc9 100644
--- a/xen/arch/x86/include/asm/mem_sharing.h
+++ b/xen/arch/x86/include/asm/mem_sharing.h
@@ -85,6 +85,9 @@ static inline bool mem_sharing_is_fork(const struct domain *d)
 int mem_sharing_fork_page(struct domain *d, gfn_t gfn,
                           bool unsharing);
 
+int mem_sharing_fork_reset(struct domain *d, bool reset_state,
+                           bool reset_memory);
+
 /*
  * If called by a foreign domain, possible errors are
  *   -EBUSY -> ring full
@@ -148,6 +151,12 @@ static inline int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool lock)
     return -EOPNOTSUPP;
 }
 
+static inline int mem_sharing_fork_reset(struct domain *d, bool reset_state,
+                                         bool reset_memory)
+{
+    return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* __MEM_SHARING_H__ */
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 15e6a7ed81..2f447d94ab 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1879,15 +1879,21 @@ static int fork(struct domain *cd, struct domain *d)
  * footprints the hypercall continuation should be implemented (or if this
  * feature needs to be become "stable").
  */
-static int mem_sharing_fork_reset(struct domain *d)
+int mem_sharing_fork_reset(struct domain *d, bool reset_state,
+                           bool reset_memory)
 {
-    int rc;
+    int rc = 0;
     struct domain *pd = d->parent;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     struct page_info *page, *tmp;
 
+    ASSERT(reset_state || reset_memory);
+
     domain_pause(d);
 
+    if ( !reset_memory )
+        goto state;
+
     /* need recursive lock because we will free pages */
     spin_lock_recursive(&d->page_alloc_lock);
     page_list_for_each_safe(page, tmp, &d->page_list)
@@ -1920,7 +1926,9 @@ static int mem_sharing_fork_reset(struct domain *d)
     }
     spin_unlock_recursive(&d->page_alloc_lock);
 
-    rc = copy_settings(d, pd);
+ state:
+    if ( reset_state )
+        rc = copy_settings(d, pd);
 
     domain_unpause(d);
 
@@ -2227,15 +2235,21 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
 
     case XENMEM_sharing_op_fork_reset:
     {
+        bool reset_state = mso.u.fork.flags & XENMEM_FORK_RESET_STATE;
+        bool reset_memory = mso.u.fork.flags & XENMEM_FORK_RESET_MEMORY;
+
         rc = -EINVAL;
-        if ( mso.u.fork.pad || mso.u.fork.flags )
+        if ( mso.u.fork.pad || (!reset_state && !reset_memory) )
+            goto out;
+        if ( mso.u.fork.flags &
+             ~(XENMEM_FORK_RESET_STATE | XENMEM_FORK_RESET_MEMORY) )
             goto out;
 
         rc = -ENOSYS;
         if ( !d->parent )
             goto out;
 
-        rc = mem_sharing_fork_reset(d);
+        rc = mem_sharing_fork_reset(d, reset_state, reset_memory);
         break;
     }
 
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 84cf52636b..d26a6699fc 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -28,6 +28,11 @@
 #include <asm/p2m.h>
 #include <asm/monitor.h>
 #include <asm/vm_event.h>
+
+#ifdef CONFIG_MEM_SHARING
+#include <asm/mem_sharing.h>
+#endif
+
 #include <xsm/xsm.h>
 #include <public/hvm/params.h>
 
@@ -394,6 +399,16 @@ static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
             if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
                 p2m_mem_paging_resume(d, &rsp);
 #endif
+#ifdef CONFIG_MEM_SHARING
+            if ( mem_sharing_is_fork(d) )
+            {
+                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
+                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
+
+                if ( reset_state || reset_mem )
+                    ASSERT(!mem_sharing_fork_reset(d, reset_state, reset_mem));
+            }
+#endif
 
             /*
              * Check emulation flags in the arch-specific handler only, as it
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index a1a0f0233a..f8d26fb77d 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
                 uint32_t gref;     /* IN: gref to debug         */
             } u;
         } debug;
-        struct mem_sharing_op_fork {      /* OP_FORK */
+        struct mem_sharing_op_fork {      /* OP_FORK{,_RESET} */
             domid_t parent_domain;        /* IN: parent's domain id */
 /* Only makes sense for short-lived forks */
 #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
 /* Only makes sense for short-lived forks */
 #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
+#define XENMEM_FORK_RESET_STATE        (1u << 2)
+#define XENMEM_FORK_RESET_MEMORY       (1u << 3)
             uint16_t flags;               /* IN: optional settings */
             uint32_t pad;                 /* Must be set to 0 */
         } fork;
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index bb003d21d0..81c2ee28cc 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -127,6 +127,14 @@
  * Reset the vmtrace buffer (if vmtrace is enabled)
  */
 #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
+/*
+ * Reset the VM state (if VM is fork)
+ */
+#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
+/*
+ * Remove unshared entried from physmap (if VM is fork)
+ */
+#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
 
 /*
  * Reasons for the vm event request
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits
  2022-04-13 13:41 [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
@ 2022-04-13 13:41 ` Tamas K Lengyel
  2022-04-22 14:08   ` Tamas K Lengyel
  2022-04-25 14:41   ` Roger Pau Monné
  2022-04-22 14:07 ` [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
  2022-04-25 14:11 ` Roger Pau Monné
  2 siblings, 2 replies; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-13 13:41 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, Anthony PERARD, Juergen Gross,
	Andrew Cooper, George Dunlap, Jan Beulich, Julien Grall,
	Stefano Stabellini, Tamas K Lengyel, Alexandru Isaila,
	Petre Pircalabu, Roger Pau Monné,
	Jun Nakajima, Kevin Tian

Add monitor event that hooks the vmexit handler allowing for both sync and
async monitoring of events. With async monitoring an event is placed on the
monitor ring for each exit and the rest of the vmexit handler resumes normally.
If there are additional monitor events configured those will also place their
respective events on the monitor ring.

With the sync version an event is placed on the monitor ring but the handler
does not get resumed, thus the sync version is only useful when the VM is not
expected to resume normally after the vmexit. Our use-case is primarily with
the sync version with VM forks where the fork gets reset after sync vmexit
event, thus the rest of the vmexit handler can be safely skipped. This is
very useful when we want to avoid Xen crashing the VM under any circumstance,
for example during fuzzing. Collecting all vmexit information regardless of
the root cause makes it easier to reason about the state of the VM on the
monitor side, hence we opt to receive all events, even for external interrupt
and NMI exits and let the monitor agent decide how to proceed.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
v4: Minor tweaks and more verbose patch description.

Note: making the sync version resume-friendly is specifically out-of-scope as
it would require significant rearrangement of the vmexit handler. As this
feature is not required for our use-case we opt for the version that minimizes
impact on the existing code.
---
 tools/include/xenctrl.h                |  2 ++
 tools/libs/ctrl/xc_monitor.c           | 15 +++++++++++++++
 xen/arch/x86/hvm/monitor.c             | 18 ++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c             | 12 ++++++++++++
 xen/arch/x86/include/asm/domain.h      |  2 ++
 xen/arch/x86/include/asm/hvm/monitor.h |  2 ++
 xen/arch/x86/include/asm/monitor.h     |  3 ++-
 xen/arch/x86/monitor.c                 | 14 ++++++++++++++
 xen/include/public/domctl.h            |  6 ++++++
 xen/include/public/vm_event.h          |  8 ++++++++
 10 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 1b089a2c02..159eaac050 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -2096,6 +2096,8 @@ int xc_monitor_privileged_call(xc_interface *xch, uint32_t domain_id,
                                bool enable);
 int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
                                   bool enable);
+int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
+                      bool sync);
 /**
  * This function enables / disables emulation for each REP for a
  * REP-compatible instruction.
diff --git a/tools/libs/ctrl/xc_monitor.c b/tools/libs/ctrl/xc_monitor.c
index 4ac823e775..c5fa62ff30 100644
--- a/tools/libs/ctrl/xc_monitor.c
+++ b/tools/libs/ctrl/xc_monitor.c
@@ -246,6 +246,21 @@ int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
     return do_domctl(xch, &domctl);
 }
 
+int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
+                      bool sync)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_monitor_op;
+    domctl.domain = domain_id;
+    domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
+                                    : XEN_DOMCTL_MONITOR_OP_DISABLE;
+    domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_VMEXIT;
+    domctl.u.monitor_op.u.vmexit.sync = sync;
+
+    return do_domctl(xch, &domctl);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index b44a1e1dfe..64a38e8fa7 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -328,6 +328,24 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
     return monitor_traps(curr, true, &req) >= 0;
 }
 
+int hvm_monitor_vmexit(unsigned long exit_reason,
+                       unsigned long exit_qualification)
+{
+    struct vcpu *curr = current;
+    struct arch_domain *ad = &curr->domain->arch;
+    vm_event_request_t req = {};
+
+    ASSERT(ad->monitor.vmexit_enabled);
+
+    req.reason = VM_EVENT_REASON_VMEXIT;
+    req.u.vmexit.reason = exit_reason;
+    req.u.vmexit.qualification = exit_qualification;
+
+    set_npt_base(curr, &req);
+
+    return monitor_traps(curr, ad->monitor.vmexit_sync, &req);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c075370f64..2794db46f9 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -4008,6 +4008,18 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
         }
     }
 
+    if ( unlikely(currd->arch.monitor.vmexit_enabled) )
+    {
+        int rc;
+
+        __vmread(EXIT_QUALIFICATION, &exit_qualification);
+        rc = hvm_monitor_vmexit(exit_reason, exit_qualification);
+        if ( rc < 0 )
+            goto exit_and_crash;
+        if ( rc )
+            return;
+    }
+
     /* XXX: This looks ugly, but we need a mechanism to ensure
      * any pending vmresume has really happened
      */
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index e62e109598..855db352c0 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -430,6 +430,8 @@ struct arch_domain
          */
         unsigned int inguest_pagefault_disabled                            : 1;
         unsigned int control_register_values                               : 1;
+        unsigned int vmexit_enabled                                        : 1;
+        unsigned int vmexit_sync                                           : 1;
         struct monitor_msr_bitmap *msr_bitmap;
         uint64_t write_ctrlreg_mask[4];
     } monitor;
diff --git a/xen/arch/x86/include/asm/hvm/monitor.h b/xen/arch/x86/include/asm/hvm/monitor.h
index a75cd8545c..639f6dfa37 100644
--- a/xen/arch/x86/include/asm/hvm/monitor.h
+++ b/xen/arch/x86/include/asm/hvm/monitor.h
@@ -51,6 +51,8 @@ bool hvm_monitor_emul_unimplemented(void);
 
 bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
                            uint16_t kind);
+int hvm_monitor_vmexit(unsigned long exit_reason,
+                       unsigned long exit_qualification);
 
 #endif /* __ASM_X86_HVM_MONITOR_H__ */
 
diff --git a/xen/arch/x86/include/asm/monitor.h b/xen/arch/x86/include/asm/monitor.h
index 01c6d63bb9..d8d54c5f23 100644
--- a/xen/arch/x86/include/asm/monitor.h
+++ b/xen/arch/x86/include/asm/monitor.h
@@ -89,7 +89,8 @@ static inline uint32_t arch_monitor_get_capabilities(struct domain *d)
                     (1U << XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION) |
                     (1U << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) |
                     (1U << XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED) |
-                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT));
+                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT) |
+                    (1U << XEN_DOMCTL_MONITOR_EVENT_VMEXIT));
 
     if ( hvm_is_singlestep_supported() )
         capabilities |= (1U << XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP);
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index 3079726a8b..30ca71432c 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -332,6 +332,20 @@ int arch_monitor_domctl_event(struct domain *d,
         break;
     }
 
+    case XEN_DOMCTL_MONITOR_EVENT_VMEXIT:
+    {
+        bool old_status = ad->monitor.vmexit_enabled;
+
+        if ( unlikely(old_status == requested_status) )
+            return -EEXIST;
+
+        domain_pause(d);
+        ad->monitor.vmexit_enabled = requested_status;
+        ad->monitor.vmexit_sync = mop->u.vmexit.sync;
+        domain_unpause(d);
+        break;
+    }
+
     default:
         /*
          * Should not be reached unless arch_monitor_get_capabilities() is
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index b85e6170b0..4803ed7afc 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1057,6 +1057,7 @@ struct xen_domctl_psr_cmt_op {
 #define XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED    10
 /* Enabled by default */
 #define XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT     11
+#define XEN_DOMCTL_MONITOR_EVENT_VMEXIT                12
 
 struct xen_domctl_monitor_op {
     uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
@@ -1107,6 +1108,11 @@ struct xen_domctl_monitor_op {
             /* Pause vCPU until response */
             uint8_t sync;
         } debug_exception;
+
+        struct {
+            /* Send event and don't process vmexit */
+            uint8_t sync;
+        } vmexit;
     } u;
 };
 
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index 81c2ee28cc..07f106f811 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -175,6 +175,8 @@
 #define VM_EVENT_REASON_DESCRIPTOR_ACCESS       13
 /* Current instruction is not implemented by the emulator */
 #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED      14
+/* VMEXIT */
+#define VM_EVENT_REASON_VMEXIT                  15
 
 /* Supported values for the vm_event_write_ctrlreg index. */
 #define VM_EVENT_X86_CR0    0
@@ -394,6 +396,11 @@ struct vm_event_emul_insn_data {
     uint8_t data[16]; /* Has to be completely filled */
 };
 
+struct vm_event_vmexit {
+    uint64_t reason;
+    uint64_t qualification;
+};
+
 typedef struct vm_event_st {
     uint32_t version;   /* VM_EVENT_INTERFACE_VERSION */
     uint32_t flags;     /* VM_EVENT_FLAG_* */
@@ -414,6 +421,7 @@ typedef struct vm_event_st {
         struct vm_event_debug                 software_breakpoint;
         struct vm_event_debug                 debug_exception;
         struct vm_event_cpuid                 cpuid;
+        struct vm_event_vmexit                vmexit;
         union {
             struct vm_event_interrupt_x86     x86;
         } interrupt;
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-13 13:41 [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
  2022-04-13 13:41 ` [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits Tamas K Lengyel
@ 2022-04-22 14:07 ` Tamas K Lengyel
  2022-04-25  7:49   ` Jan Beulich
  2022-04-25 14:11 ` Roger Pau Monné
  2 siblings, 1 reply; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-22 14:07 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: xen-devel, Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	George Dunlap, Jan Beulich, Julien Grall, Stefano Stabellini,
	Roger Pau Monné,
	Alexandru Isaila, Petre Pircalabu

On Wed, Apr 13, 2022 at 9:43 AM Tamas K Lengyel <tamas.lengyel@intel.com> wrote:
>
> Allow specify distinct parts of the fork VM to be reset. This is useful when a
> fuzzing operation involves mapping in only a handful of pages that are known
> ahead of time. Throwing these pages away just to be re-copied immediately is
> expensive, thus allowing to specify partial resets can speed things up.
>
> Also allow resetting to be initiated from vm_event responses as an
> optimization.
>
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>

Patch ping. Could I get a Reviewed-by if there are no objections?

Thanks,
Tamas


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits
  2022-04-13 13:41 ` [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits Tamas K Lengyel
@ 2022-04-22 14:08   ` Tamas K Lengyel
  2022-04-25 14:41   ` Roger Pau Monné
  1 sibling, 0 replies; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-22 14:08 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: xen-devel, Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	George Dunlap, Jan Beulich, Julien Grall, Stefano Stabellini,
	Alexandru Isaila, Petre Pircalabu, Roger Pau Monné,
	Jun Nakajima, Kevin Tian

On Wed, Apr 13, 2022 at 9:43 AM Tamas K Lengyel <tamas.lengyel@intel.com> wrote:
>
> Add monitor event that hooks the vmexit handler allowing for both sync and
> async monitoring of events. With async monitoring an event is placed on the
> monitor ring for each exit and the rest of the vmexit handler resumes normally.
> If there are additional monitor events configured those will also place their
> respective events on the monitor ring.
>
> With the sync version an event is placed on the monitor ring but the handler
> does not get resumed, thus the sync version is only useful when the VM is not
> expected to resume normally after the vmexit. Our use-case is primarily with
> the sync version with VM forks where the fork gets reset after sync vmexit
> event, thus the rest of the vmexit handler can be safely skipped. This is
> very useful when we want to avoid Xen crashing the VM under any circumstance,
> for example during fuzzing. Collecting all vmexit information regardless of
> the root cause makes it easier to reason about the state of the VM on the
> monitor side, hence we opt to receive all events, even for external interrupt
> and NMI exits and let the monitor agent decide how to proceed.
>
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> ---
> v4: Minor tweaks and more verbose patch description.
>
> Note: making the sync version resume-friendly is specifically out-of-scope as
> it would require significant rearrangement of the vmexit handler. As this
> feature is not required for our use-case we opt for the version that minimizes
> impact on the existing code.

Patch ping.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-22 14:07 ` [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
@ 2022-04-25  7:49   ` Jan Beulich
  2022-04-25 11:29     ` Tamas K Lengyel
  0 siblings, 1 reply; 19+ messages in thread
From: Jan Beulich @ 2022-04-25  7:49 UTC (permalink / raw)
  To: Tamas K Lengyel, Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu
  Cc: xen-devel, Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	George Dunlap, Julien Grall, Stefano Stabellini,
	Roger Pau Monné

On 22.04.2022 16:07, Tamas K Lengyel wrote:
> On Wed, Apr 13, 2022 at 9:43 AM Tamas K Lengyel <tamas.lengyel@intel.com> wrote:
>>
>> Allow specify distinct parts of the fork VM to be reset. This is useful when a
>> fuzzing operation involves mapping in only a handful of pages that are known
>> ahead of time. Throwing these pages away just to be re-copied immediately is
>> expensive, thus allowing to specify partial resets can speed things up.
>>
>> Also allow resetting to be initiated from vm_event responses as an
>> optimization.
>>
>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> 
> Patch ping. Could I get a Reviewed-by if there are no objections?

Hmm, this is a little difficult. I'd be willing to give an ack, but that's
meaningless for most of the code here. Besides a stylistic issue I did
point out which I'm not happy with, I'm afraid I'm not good enough at
mem-sharing and forking. Therefore I wouldn't want to offer an R-b.
Considering the VM event interaction, maybe the BitDefender guys could
take a stab?

Of course you'd then still need a tool stack side ack.

Jan



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-25  7:49   ` Jan Beulich
@ 2022-04-25 11:29     ` Tamas K Lengyel
  2022-04-25 11:41       ` Jan Beulich
  2022-04-25 12:52       ` George Dunlap
  0 siblings, 2 replies; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-25 11:29 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu, Xen-devel,
	Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	George Dunlap, Julien Grall, Stefano Stabellini,
	Roger Pau Monné

[-- Attachment #1: Type: text/plain, Size: 2052 bytes --]

On Mon, Apr 25, 2022, 3:49 AM Jan Beulich <jbeulich@suse.com> wrote:

> On 22.04.2022 16:07, Tamas K Lengyel wrote:
> > On Wed, Apr 13, 2022 at 9:43 AM Tamas K Lengyel <tamas.lengyel@intel.com>
> wrote:
> >>
> >> Allow specify distinct parts of the fork VM to be reset. This is useful
> when a
> >> fuzzing operation involves mapping in only a handful of pages that are
> known
> >> ahead of time. Throwing these pages away just to be re-copied
> immediately is
> >> expensive, thus allowing to specify partial resets can speed things up.
> >>
> >> Also allow resetting to be initiated from vm_event responses as an
> >> optimization.
> >>
> >> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> >
> > Patch ping. Could I get a Reviewed-by if there are no objections?
>
> Hmm, this is a little difficult. I'd be willing to give an ack, but that's
> meaningless for most of the code here. Besides a stylistic issue I did
> point out which I'm not happy with, I'm afraid I'm not good enough at
> mem-sharing and forking. Therefore I wouldn't want to offer an R-b.
> Considering the VM event interaction, maybe the BitDefender guys could
> take a stab?
>
> Of course you'd then still need a tool stack side ack.
>

So my take is that noone cares about mem_sharing, which is fine, its an
obscure experiment subsystem. But the only path I see as maintainer to get
anything in-tree is if I hand the task of writing the patch to a coworker
who then sends it in so that I can ack it. This is clearly disfunctional
and is to the detriment of the project overall. We need to get some rules
in place to avoid situations like this that clearly lead to no development
and no improvement and a huge incentive to forgot about upstreaming. With
no substantive objections but no acks a maintainer should be able to get
changes in-tree. That's part of what I would consider maintaining a
codebase to be!

Anyway, to be realistic I don't expect that option to materialize so I'm
very close to just stop all contributions to the project. It's dishartening.

Tamas

[-- Attachment #2: Type: text/html, Size: 3029 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-25 11:29     ` Tamas K Lengyel
@ 2022-04-25 11:41       ` Jan Beulich
  2022-04-25 12:52       ` George Dunlap
  1 sibling, 0 replies; 19+ messages in thread
From: Jan Beulich @ 2022-04-25 11:41 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu, Xen-devel,
	Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	Julien Grall, Stefano Stabellini, Roger Pau Monné,
	George Dunlap

On 25.04.2022 13:29, Tamas K Lengyel wrote:
> On Mon, Apr 25, 2022, 3:49 AM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> On 22.04.2022 16:07, Tamas K Lengyel wrote:
>>> On Wed, Apr 13, 2022 at 9:43 AM Tamas K Lengyel <tamas.lengyel@intel.com>
>> wrote:
>>>>
>>>> Allow specify distinct parts of the fork VM to be reset. This is useful
>> when a
>>>> fuzzing operation involves mapping in only a handful of pages that are
>> known
>>>> ahead of time. Throwing these pages away just to be re-copied
>> immediately is
>>>> expensive, thus allowing to specify partial resets can speed things up.
>>>>
>>>> Also allow resetting to be initiated from vm_event responses as an
>>>> optimization.
>>>>
>>>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>>>
>>> Patch ping. Could I get a Reviewed-by if there are no objections?
>>
>> Hmm, this is a little difficult. I'd be willing to give an ack, but that's
>> meaningless for most of the code here. Besides a stylistic issue I did
>> point out which I'm not happy with, I'm afraid I'm not good enough at
>> mem-sharing and forking. Therefore I wouldn't want to offer an R-b.
>> Considering the VM event interaction, maybe the BitDefender guys could
>> take a stab?
>>
>> Of course you'd then still need a tool stack side ack.
>>
> 
> So my take is that noone cares about mem_sharing, which is fine, its an
> obscure experiment subsystem. But the only path I see as maintainer to get
> anything in-tree is if I hand the task of writing the patch to a coworker
> who then sends it in so that I can ack it. This is clearly disfunctional
> and is to the detriment of the project overall. We need to get some rules
> in place to avoid situations like this that clearly lead to no development
> and no improvement and a huge incentive to forgot about upstreaming. With
> no substantive objections but no acks a maintainer should be able to get
> changes in-tree. That's part of what I would consider maintaining a
> codebase to be!

I certainly understand your frustration, the more that I'm similarly
affected with a much larger pile of patches. The check-in policy (see
./MAINTAINERS) is - I'm tempted to say "unfortunately" - quite clear
about there being a need for a 2nd party to be involved. In this case
though I've pointed out a possible route to unblock these two patches
- let's give Petre and Alexandru at least a few days to possibly
react to the ping. Apart from this I can only suggest to put this on
the agenda of the next Community Call; I'm afraid I won't myself, as
I've had this topic there already way too often.

Jan



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-25 11:29     ` Tamas K Lengyel
  2022-04-25 11:41       ` Jan Beulich
@ 2022-04-25 12:52       ` George Dunlap
  2022-04-25 13:26         ` Tamas K Lengyel
  1 sibling, 1 reply; 19+ messages in thread
From: George Dunlap @ 2022-04-25 12:52 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Jan Beulich, Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu,
	Xen-devel, Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	Julien Grall, Stefano Stabellini, Roger Pau Monné,
	George Dunlap

[-- Attachment #1: Type: text/plain, Size: 3886 bytes --]

On Mon, Apr 25, 2022 at 12:29 PM Tamas K Lengyel <tamas@tklengyel.com>
wrote:

>
>
> On Mon, Apr 25, 2022, 3:49 AM Jan Beulich <jbeulich@suse.com> wrote:
>
>> On 22.04.2022 16:07, Tamas K Lengyel wrote:
>> > On Wed, Apr 13, 2022 at 9:43 AM Tamas K Lengyel <
>> tamas.lengyel@intel.com> wrote:
>> >>
>> >> Allow specify distinct parts of the fork VM to be reset. This is
>> useful when a
>> >> fuzzing operation involves mapping in only a handful of pages that are
>> known
>> >> ahead of time. Throwing these pages away just to be re-copied
>> immediately is
>> >> expensive, thus allowing to specify partial resets can speed things up.
>> >>
>> >> Also allow resetting to be initiated from vm_event responses as an
>> >> optimization.
>> >>
>> >> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>> >
>> > Patch ping. Could I get a Reviewed-by if there are no objections?
>>
>> Hmm, this is a little difficult. I'd be willing to give an ack, but that's
>> meaningless for most of the code here. Besides a stylistic issue I did
>> point out which I'm not happy with, I'm afraid I'm not good enough at
>> mem-sharing and forking. Therefore I wouldn't want to offer an R-b.
>> Considering the VM event interaction, maybe the BitDefender guys could
>> take a stab?
>>
>> Of course you'd then still need a tool stack side ack.
>>
>
> So my take is that noone cares about mem_sharing, which is fine, its an
> obscure experiment subsystem.
>

My take is slightly different; it's more that the project is large enough
that it's difficult to se where the needs are.  If Roger or Andy or I or
Wei or anyone see a thread with you & Jan going back and forth, it's
natural for us to assume that you & Jan have it in hand, and there's no
need for us to read through the thread.  Jan dislikes asking specific
people for a review, but many of the rest of us have sort of gotten in the
habit of doing so, as a way to solve the "visibility" issue.  The only
other way I can think of to solve the problem is to have a robot try to
assign tasks to people -- a method that has received skepticism, and would
also require a non-negligible amount of tooling to be written.


> But the only path I see as maintainer to get anything in-tree is if I hand
> the task of writing the patch to a coworker who then sends it in so that I
> can ack it. This is clearly disfunctional and is to the detriment of the
> project overall. We need to get some rules in place to avoid situations
> like this that clearly lead to no development and no improvement and a huge
> incentive to forgot about upstreaming. With no substantive objections but
> no acks a maintainer should be able to get changes in-tree. That's part of
> what I would consider maintaining a codebase to be!
>

Another possibility would be to ask your colleague actually do a
Reviewed-by.  The first time or two they might not be "of suitable stature
in the community", but I don't think it should take long to establish such
a stature, if they were doing the review in earnest.

I do agree that it seems like in this situation, the bar seems too high for
you to get your own code checked in.  I'd be open to the argument that we
should change the text of the check-in policy in MAINTAINERS to allow
maintainer modifications with only an Acked-by.


> Anyway, to be realistic I don't expect that option to materialize so I'm
> very close to just stop all contributions to the project. It's dishartening.
>


I can understand why you'd be disheartened if you thought you just couldn't
get any code checked in even as maintainer.  However, there are lots of
escalation paths open to you: you could email the community manager (me);
you could make a wider appeal on IRC for reviewers; you could raise the
general issue at the community call; you could send a patch proposing
changes to the check-in procedure described in MAINTAINERS.

 -George

>

[-- Attachment #2: Type: text/html, Size: 5921 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-25 12:52       ` George Dunlap
@ 2022-04-25 13:26         ` Tamas K Lengyel
  0 siblings, 0 replies; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-25 13:26 UTC (permalink / raw)
  To: George Dunlap
  Cc: Jan Beulich, Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu,
	Xen-devel, Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	Julien Grall, Stefano Stabellini, Roger Pau Monné,
	George Dunlap

On Mon, Apr 25, 2022 at 8:53 AM George Dunlap <dunlapg@umich.edu> wrote:
>
>
>
> On Mon, Apr 25, 2022 at 12:29 PM Tamas K Lengyel <tamas@tklengyel.com> wrote:
>>
>>
>>
>> On Mon, Apr 25, 2022, 3:49 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>
>>> On 22.04.2022 16:07, Tamas K Lengyel wrote:
>>> > On Wed, Apr 13, 2022 at 9:43 AM Tamas K Lengyel <tamas.lengyel@intel.com> wrote:
>>> >>
>>> >> Allow specify distinct parts of the fork VM to be reset. This is useful when a
>>> >> fuzzing operation involves mapping in only a handful of pages that are known
>>> >> ahead of time. Throwing these pages away just to be re-copied immediately is
>>> >> expensive, thus allowing to specify partial resets can speed things up.
>>> >>
>>> >> Also allow resetting to be initiated from vm_event responses as an
>>> >> optimization.
>>> >>
>>> >> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>>> >
>>> > Patch ping. Could I get a Reviewed-by if there are no objections?
>>>
>>> Hmm, this is a little difficult. I'd be willing to give an ack, but that's
>>> meaningless for most of the code here. Besides a stylistic issue I did
>>> point out which I'm not happy with, I'm afraid I'm not good enough at
>>> mem-sharing and forking. Therefore I wouldn't want to offer an R-b.
>>> Considering the VM event interaction, maybe the BitDefender guys could
>>> take a stab?
>>>
>>> Of course you'd then still need a tool stack side ack.
>>
>>
>> So my take is that noone cares about mem_sharing, which is fine, its an obscure experiment subsystem.
>
>
> My take is slightly different; it's more that the project is large enough that it's difficult to se where the needs are.  If Roger or Andy or I or Wei or anyone see a thread with you & Jan going back and forth, it's natural for us to assume that you & Jan have it in hand, and there's no need for us to read through the thread.  Jan dislikes asking specific people for a review, but many of the rest of us have sort of gotten in the habit of doing so, as a way to solve the "visibility" issue.  The only other way I can think of to solve the problem is to have a robot try to assign tasks to people -- a method that has received skepticism, and would also require a non-negligible amount of tooling to be written.

What's the point of the MAINTAINER's file and being automatically CC-d
if you then still separately have to ping the same people by name?
It's fine not to give an a-b/r-b if you still see discussion on some
parts of the patch, but like on this one, where the tools changes are
trivial - why would you wait? To be frank I long consider the
tools-side part of Xen unmaintained with only the most trivial stuff
ever having a chance to make it in. VM forking has effectively 0
toolstack side support in-tree because I never got any feedback from
tools maintainers after sending the patches in for months. I would
consider the toolstack side stuff in this patch for example trivial,
but again, no tools maintainers ever look at it so I was actually
considering dropping it from the patch completely since I really only
need the vm_event interface. Again, that would be dropping an
otherwise potentially useful interface purely due to the dysfunction
of the project's maintenance.

>
>>
>> But the only path I see as maintainer to get anything in-tree is if I hand the task of writing the patch to a coworker who then sends it in so that I can ack it. This is clearly disfunctional and is to the detriment of the project overall. We need to get some rules in place to avoid situations like this that clearly lead to no development and no improvement and a huge incentive to forgot about upstreaming. With no substantive objections but no acks a maintainer should be able to get changes in-tree. That's part of what I would consider maintaining a codebase to be!
>
>
> Another possibility would be to ask your colleague actually do a Reviewed-by.  The first time or two they might not be "of suitable stature in the community", but I don't think it should take long to establish such a stature, if they were doing the review in earnest.

Sure, but clearly still more effort then it should be just to work
around the system.

>
> I do agree that it seems like in this situation, the bar seems too high for you to get your own code checked in.  I'd be open to the argument that we should change the text of the check-in policy in MAINTAINERS to allow maintainer modifications with only an Acked-by.

Happy to hear! I think such a change would reduce the overhead on
reviewing patches like this that clearly have no effect on anything
else.

>
>>
>> Anyway, to be realistic I don't expect that option to materialize so I'm very close to just stop all contributions to the project. It's dishartening.
>
>
>
> I can understand why you'd be disheartened if you thought you just couldn't get any code checked in even as maintainer.  However, there are lots of escalation paths open to you: you could email the community manager (me); you could make a wider appeal on IRC for reviewers; you could raise the general issue at the community call; you could send a patch proposing changes to the check-in procedure described in MAINTAINERS.

Fair point. But when your main job is not working on Xen and you have
a couple weeks in-between other stuff to try to get some improvements
in, it's not really viable to have to go reform the whole project.
That's just the reality. If we can reduce the bar on getting code
upstream in situations like this then I would be happy to continue
working on the project but otherwise I don't see how this is worth
anyone's time.

Tamas


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-13 13:41 [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
  2022-04-13 13:41 ` [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits Tamas K Lengyel
  2022-04-22 14:07 ` [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
@ 2022-04-25 14:11 ` Roger Pau Monné
  2022-04-25 15:24   ` Tamas K Lengyel
  2 siblings, 1 reply; 19+ messages in thread
From: Roger Pau Monné @ 2022-04-25 14:11 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: xen-devel, Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	George Dunlap, Jan Beulich, Julien Grall, Stefano Stabellini,
	Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu

On Wed, Apr 13, 2022 at 09:41:51AM -0400, Tamas K Lengyel wrote:
> Allow specify distinct parts of the fork VM to be reset. This is useful when a
> fuzzing operation involves mapping in only a handful of pages that are known
> ahead of time. Throwing these pages away just to be re-copied immediately is
> expensive, thus allowing to specify partial resets can speed things up.
> 
> Also allow resetting to be initiated from vm_event responses as an
> optiomization.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> v4: No change
> v3: Rebase on simpler approach after dropping empty_p2m feature
> v2: address review comments and add more sanity checking
> ---
>  tools/include/xenctrl.h                |  3 ++-
>  tools/libs/ctrl/xc_memshr.c            |  7 ++++++-
>  xen/arch/x86/include/asm/mem_sharing.h |  9 +++++++++
>  xen/arch/x86/mm/mem_sharing.c          | 24 +++++++++++++++++++-----
>  xen/common/vm_event.c                  | 15 +++++++++++++++
>  xen/include/public/memory.h            |  4 +++-
>  xen/include/public/vm_event.h          |  8 ++++++++
>  7 files changed, 62 insertions(+), 8 deletions(-)
> 
> diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> index 95bd5eca67..1b089a2c02 100644
> --- a/tools/include/xenctrl.h
> +++ b/tools/include/xenctrl.h
> @@ -2290,7 +2290,8 @@ int xc_memshr_fork(xc_interface *xch,
>   *
>   * With VMs that have a lot of memory this call may block for a long time.
>   */
> -int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain);
> +int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain,
> +                         bool reset_state, bool reset_memory);
>  
>  /* Debug calls: return the number of pages referencing the shared frame backing
>   * the input argument. Should be one or greater.
> diff --git a/tools/libs/ctrl/xc_memshr.c b/tools/libs/ctrl/xc_memshr.c
> index a6cfd7dccf..a0d0b894e2 100644
> --- a/tools/libs/ctrl/xc_memshr.c
> +++ b/tools/libs/ctrl/xc_memshr.c
> @@ -257,12 +257,17 @@ int xc_memshr_fork(xc_interface *xch, uint32_t pdomid, uint32_t domid,
>      return xc_memshr_memop(xch, domid, &mso);
>  }
>  
> -int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid)
> +int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid, bool reset_state,
> +                         bool reset_memory)
>  {
>      xen_mem_sharing_op_t mso;
>  
>      memset(&mso, 0, sizeof(mso));
>      mso.op = XENMEM_sharing_op_fork_reset;
> +    if ( reset_state )
> +        mso.u.fork.flags |= XENMEM_FORK_RESET_STATE;
> +    if ( reset_memory )
> +        mso.u.fork.flags |= XENMEM_FORK_RESET_MEMORY;

IMO would be clearer to init mso fields at definition.

> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> index 84cf52636b..d26a6699fc 100644
> --- a/xen/common/vm_event.c
> +++ b/xen/common/vm_event.c
> @@ -28,6 +28,11 @@
>  #include <asm/p2m.h>
>  #include <asm/monitor.h>
>  #include <asm/vm_event.h>
> +
> +#ifdef CONFIG_MEM_SHARING
> +#include <asm/mem_sharing.h>
> +#endif
> +
>  #include <xsm/xsm.h>
>  #include <public/hvm/params.h>
>  
> @@ -394,6 +399,16 @@ static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
>              if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
>                  p2m_mem_paging_resume(d, &rsp);
>  #endif
> +#ifdef CONFIG_MEM_SHARING
> +            if ( mem_sharing_is_fork(d) )
> +            {
> +                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
> +                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
> +
> +                if ( reset_state || reset_mem )
> +                    ASSERT(!mem_sharing_fork_reset(d, reset_state, reset_mem));

Might be appropriate to destroy the domain in case fork reset fails?
ASSERT will only help in debug builds.

> +            }
> +#endif
>  
>              /*
>               * Check emulation flags in the arch-specific handler only, as it
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index a1a0f0233a..f8d26fb77d 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
>                  uint32_t gref;     /* IN: gref to debug         */
>              } u;
>          } debug;
> -        struct mem_sharing_op_fork {      /* OP_FORK */
> +        struct mem_sharing_op_fork {      /* OP_FORK{,_RESET} */
>              domid_t parent_domain;        /* IN: parent's domain id */
>  /* Only makes sense for short-lived forks */
>  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
>  /* Only makes sense for short-lived forks */
>  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)

Should you add:

/* Only for OP_FORK_RESET. */

> +#define XENMEM_FORK_RESET_STATE        (1u << 2)
> +#define XENMEM_FORK_RESET_MEMORY       (1u << 3)

Thanks, Roger.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits
  2022-04-13 13:41 ` [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits Tamas K Lengyel
  2022-04-22 14:08   ` Tamas K Lengyel
@ 2022-04-25 14:41   ` Roger Pau Monné
  2022-04-25 15:40     ` Tamas K Lengyel
  1 sibling, 1 reply; 19+ messages in thread
From: Roger Pau Monné @ 2022-04-25 14:41 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: xen-devel, Wei Liu, Anthony PERARD, Juergen Gross, Andrew Cooper,
	George Dunlap, Jan Beulich, Julien Grall, Stefano Stabellini,
	Tamas K Lengyel, Alexandru Isaila, Petre Pircalabu, Jun Nakajima,
	Kevin Tian

On Wed, Apr 13, 2022 at 09:41:52AM -0400, Tamas K Lengyel wrote:
> Add monitor event that hooks the vmexit handler allowing for both sync and
> async monitoring of events. With async monitoring an event is placed on the
> monitor ring for each exit and the rest of the vmexit handler resumes normally.
> If there are additional monitor events configured those will also place their
> respective events on the monitor ring.
> 
> With the sync version an event is placed on the monitor ring but the handler
> does not get resumed, thus the sync version is only useful when the VM is not
> expected to resume normally after the vmexit. Our use-case is primarily with
> the sync version with VM forks where the fork gets reset after sync vmexit
> event, thus the rest of the vmexit handler can be safely skipped. This is
> very useful when we want to avoid Xen crashing the VM under any circumstance,
> for example during fuzzing. Collecting all vmexit information regardless of
> the root cause makes it easier to reason about the state of the VM on the
> monitor side, hence we opt to receive all events, even for external interrupt
> and NMI exits and let the monitor agent decide how to proceed.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> ---
> v4: Minor tweaks and more verbose patch description.
> 
> Note: making the sync version resume-friendly is specifically out-of-scope as
> it would require significant rearrangement of the vmexit handler. As this
> feature is not required for our use-case we opt for the version that minimizes
> impact on the existing code.
> ---
>  tools/include/xenctrl.h                |  2 ++
>  tools/libs/ctrl/xc_monitor.c           | 15 +++++++++++++++
>  xen/arch/x86/hvm/monitor.c             | 18 ++++++++++++++++++
>  xen/arch/x86/hvm/vmx/vmx.c             | 12 ++++++++++++
>  xen/arch/x86/include/asm/domain.h      |  2 ++
>  xen/arch/x86/include/asm/hvm/monitor.h |  2 ++
>  xen/arch/x86/include/asm/monitor.h     |  3 ++-
>  xen/arch/x86/monitor.c                 | 14 ++++++++++++++
>  xen/include/public/domctl.h            |  6 ++++++
>  xen/include/public/vm_event.h          |  8 ++++++++
>  10 files changed, 81 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> index 1b089a2c02..159eaac050 100644
> --- a/tools/include/xenctrl.h
> +++ b/tools/include/xenctrl.h
> @@ -2096,6 +2096,8 @@ int xc_monitor_privileged_call(xc_interface *xch, uint32_t domain_id,
>                                 bool enable);
>  int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
>                                    bool enable);
> +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> +                      bool sync);
>  /**
>   * This function enables / disables emulation for each REP for a
>   * REP-compatible instruction.
> diff --git a/tools/libs/ctrl/xc_monitor.c b/tools/libs/ctrl/xc_monitor.c
> index 4ac823e775..c5fa62ff30 100644
> --- a/tools/libs/ctrl/xc_monitor.c
> +++ b/tools/libs/ctrl/xc_monitor.c
> @@ -246,6 +246,21 @@ int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
>      return do_domctl(xch, &domctl);
>  }
>  
> +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> +                      bool sync)
> +{
> +    DECLARE_DOMCTL;
> +
> +    domctl.cmd = XEN_DOMCTL_monitor_op;
> +    domctl.domain = domain_id;
> +    domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
> +                                    : XEN_DOMCTL_MONITOR_OP_DISABLE;
> +    domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_VMEXIT;
> +    domctl.u.monitor_op.u.vmexit.sync = sync;
> +
> +    return do_domctl(xch, &domctl);
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
> index b44a1e1dfe..64a38e8fa7 100644
> --- a/xen/arch/x86/hvm/monitor.c
> +++ b/xen/arch/x86/hvm/monitor.c
> @@ -328,6 +328,24 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
>      return monitor_traps(curr, true, &req) >= 0;
>  }
>  
> +int hvm_monitor_vmexit(unsigned long exit_reason,
> +                       unsigned long exit_qualification)

Should this maybe live in vmx code or have 'vmx' in the name
somewhere, so that if an svm counterpart is added this doesn't need to
be renamed?

> +{
> +    struct vcpu *curr = current;
> +    struct arch_domain *ad = &curr->domain->arch;
> +    vm_event_request_t req = {};
> +
> +    ASSERT(ad->monitor.vmexit_enabled);
> +
> +    req.reason = VM_EVENT_REASON_VMEXIT;
> +    req.u.vmexit.reason = exit_reason;
> +    req.u.vmexit.qualification = exit_qualification;

You could set those fields at definition.

> +
> +    set_npt_base(curr, &req);
> +
> +    return monitor_traps(curr, ad->monitor.vmexit_sync, &req);
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index c075370f64..2794db46f9 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -4008,6 +4008,18 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>          }
>      }
>  
> +    if ( unlikely(currd->arch.monitor.vmexit_enabled) )
> +    {
> +        int rc;
> +
> +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
> +        rc = hvm_monitor_vmexit(exit_reason, exit_qualification);
> +        if ( rc < 0 )
> +            goto exit_and_crash;
> +        if ( rc )
> +            return;
> +    }

Just for my understanding, is there any reason to not do this before
updating the altp2m?  AFAICT the update of the active EPTP won't
affect the call to hvm_monitor_vmexit.

> +
>      /* XXX: This looks ugly, but we need a mechanism to ensure
>       * any pending vmresume has really happened
>       */
> diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
> index e62e109598..855db352c0 100644
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -430,6 +430,8 @@ struct arch_domain
>           */
>          unsigned int inguest_pagefault_disabled                            : 1;
>          unsigned int control_register_values                               : 1;
> +        unsigned int vmexit_enabled                                        : 1;
> +        unsigned int vmexit_sync                                           : 1;
>          struct monitor_msr_bitmap *msr_bitmap;
>          uint64_t write_ctrlreg_mask[4];
>      } monitor;
> diff --git a/xen/arch/x86/include/asm/hvm/monitor.h b/xen/arch/x86/include/asm/hvm/monitor.h
> index a75cd8545c..639f6dfa37 100644
> --- a/xen/arch/x86/include/asm/hvm/monitor.h
> +++ b/xen/arch/x86/include/asm/hvm/monitor.h
> @@ -51,6 +51,8 @@ bool hvm_monitor_emul_unimplemented(void);
>  
>  bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
>                             uint16_t kind);
> +int hvm_monitor_vmexit(unsigned long exit_reason,
> +                       unsigned long exit_qualification);
>  
>  #endif /* __ASM_X86_HVM_MONITOR_H__ */
>  
> diff --git a/xen/arch/x86/include/asm/monitor.h b/xen/arch/x86/include/asm/monitor.h
> index 01c6d63bb9..d8d54c5f23 100644
> --- a/xen/arch/x86/include/asm/monitor.h
> +++ b/xen/arch/x86/include/asm/monitor.h
> @@ -89,7 +89,8 @@ static inline uint32_t arch_monitor_get_capabilities(struct domain *d)
>                      (1U << XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION) |
>                      (1U << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) |
>                      (1U << XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED) |
> -                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT));
> +                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT) |
> +                    (1U << XEN_DOMCTL_MONITOR_EVENT_VMEXIT));
>  
>      if ( hvm_is_singlestep_supported() )
>          capabilities |= (1U << XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP);
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index 3079726a8b..30ca71432c 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c
> @@ -332,6 +332,20 @@ int arch_monitor_domctl_event(struct domain *d,
>          break;
>      }
>  
> +    case XEN_DOMCTL_MONITOR_EVENT_VMEXIT:
> +    {
> +        bool old_status = ad->monitor.vmexit_enabled;
> +
> +        if ( unlikely(old_status == requested_status) )
> +            return -EEXIST;

What about if the requested status is the same as the current one, but
vmexit sync is not?

IOW, I'm not sure this check is helpful, and you could likely avoid
the old_status local variable.

> +
> +        domain_pause(d);
> +        ad->monitor.vmexit_enabled = requested_status;
> +        ad->monitor.vmexit_sync = mop->u.vmexit.sync;
> +        domain_unpause(d);
> +        break;
> +    }
> +
>      default:
>          /*
>           * Should not be reached unless arch_monitor_get_capabilities() is
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index b85e6170b0..4803ed7afc 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -1057,6 +1057,7 @@ struct xen_domctl_psr_cmt_op {
>  #define XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED    10
>  /* Enabled by default */
>  #define XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT     11
> +#define XEN_DOMCTL_MONITOR_EVENT_VMEXIT                12
>  
>  struct xen_domctl_monitor_op {
>      uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
> @@ -1107,6 +1108,11 @@ struct xen_domctl_monitor_op {
>              /* Pause vCPU until response */
>              uint8_t sync;
>          } debug_exception;
> +
> +        struct {
> +            /* Send event and don't process vmexit */
> +            uint8_t sync;
> +        } vmexit;
>      } u;
>  };
>  
> diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> index 81c2ee28cc..07f106f811 100644
> --- a/xen/include/public/vm_event.h
> +++ b/xen/include/public/vm_event.h
> @@ -175,6 +175,8 @@
>  #define VM_EVENT_REASON_DESCRIPTOR_ACCESS       13
>  /* Current instruction is not implemented by the emulator */
>  #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED      14
> +/* VMEXIT */
> +#define VM_EVENT_REASON_VMEXIT                  15
>  
>  /* Supported values for the vm_event_write_ctrlreg index. */
>  #define VM_EVENT_X86_CR0    0
> @@ -394,6 +396,11 @@ struct vm_event_emul_insn_data {
>      uint8_t data[16]; /* Has to be completely filled */
>  };
>  
> +struct vm_event_vmexit {
> +    uint64_t reason;
> +    uint64_t qualification;
> +};

You are exposing an Intel specific interface publicly here.  Might be
worth adding a note, and/or adding 'intel' or 'vmx' in the structure
name: vm_event_vmx_exit, so that a vm_event_svm_exit could also be
added in the future.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-25 14:11 ` Roger Pau Monné
@ 2022-04-25 15:24   ` Tamas K Lengyel
  2022-04-26  8:17     ` Roger Pau Monné
  0 siblings, 1 reply; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-25 15:24 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Tamas K Lengyel, xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Julien Grall, Stefano Stabellini, Alexandru Isaila,
	Petre Pircalabu

On Mon, Apr 25, 2022 at 10:12 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Wed, Apr 13, 2022 at 09:41:51AM -0400, Tamas K Lengyel wrote:
> > Allow specify distinct parts of the fork VM to be reset. This is useful when a
> > fuzzing operation involves mapping in only a handful of pages that are known
> > ahead of time. Throwing these pages away just to be re-copied immediately is
> > expensive, thus allowing to specify partial resets can speed things up.
> >
> > Also allow resetting to be initiated from vm_event responses as an
> > optiomization.
> >
> > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>
> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thank you!

> > ---
> > v4: No change
> > v3: Rebase on simpler approach after dropping empty_p2m feature
> > v2: address review comments and add more sanity checking
> > ---
> >  tools/include/xenctrl.h                |  3 ++-
> >  tools/libs/ctrl/xc_memshr.c            |  7 ++++++-
> >  xen/arch/x86/include/asm/mem_sharing.h |  9 +++++++++
> >  xen/arch/x86/mm/mem_sharing.c          | 24 +++++++++++++++++++-----
> >  xen/common/vm_event.c                  | 15 +++++++++++++++
> >  xen/include/public/memory.h            |  4 +++-
> >  xen/include/public/vm_event.h          |  8 ++++++++
> >  7 files changed, 62 insertions(+), 8 deletions(-)
> >
> > diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> > index 95bd5eca67..1b089a2c02 100644
> > --- a/tools/include/xenctrl.h
> > +++ b/tools/include/xenctrl.h
> > @@ -2290,7 +2290,8 @@ int xc_memshr_fork(xc_interface *xch,
> >   *
> >   * With VMs that have a lot of memory this call may block for a long time.
> >   */
> > -int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain);
> > +int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain,
> > +                         bool reset_state, bool reset_memory);
> >
> >  /* Debug calls: return the number of pages referencing the shared frame backing
> >   * the input argument. Should be one or greater.
> > diff --git a/tools/libs/ctrl/xc_memshr.c b/tools/libs/ctrl/xc_memshr.c
> > index a6cfd7dccf..a0d0b894e2 100644
> > --- a/tools/libs/ctrl/xc_memshr.c
> > +++ b/tools/libs/ctrl/xc_memshr.c
> > @@ -257,12 +257,17 @@ int xc_memshr_fork(xc_interface *xch, uint32_t pdomid, uint32_t domid,
> >      return xc_memshr_memop(xch, domid, &mso);
> >  }
> >
> > -int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid)
> > +int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid, bool reset_state,
> > +                         bool reset_memory)
> >  {
> >      xen_mem_sharing_op_t mso;
> >
> >      memset(&mso, 0, sizeof(mso));
> >      mso.op = XENMEM_sharing_op_fork_reset;
> > +    if ( reset_state )
> > +        mso.u.fork.flags |= XENMEM_FORK_RESET_STATE;
> > +    if ( reset_memory )
> > +        mso.u.fork.flags |= XENMEM_FORK_RESET_MEMORY;
>
> IMO would be clearer to init mso fields at definition.

Not sure what you mean exactly, mso = { ... }; ? I think the logic is
pretty clear as-is and I don't have any preference for one style vs
the other.

>
> > diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> > index 84cf52636b..d26a6699fc 100644
> > --- a/xen/common/vm_event.c
> > +++ b/xen/common/vm_event.c
> > @@ -28,6 +28,11 @@
> >  #include <asm/p2m.h>
> >  #include <asm/monitor.h>
> >  #include <asm/vm_event.h>
> > +
> > +#ifdef CONFIG_MEM_SHARING
> > +#include <asm/mem_sharing.h>
> > +#endif
> > +
> >  #include <xsm/xsm.h>
> >  #include <public/hvm/params.h>
> >
> > @@ -394,6 +399,16 @@ static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
> >              if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
> >                  p2m_mem_paging_resume(d, &rsp);
> >  #endif
> > +#ifdef CONFIG_MEM_SHARING
> > +            if ( mem_sharing_is_fork(d) )
> > +            {
> > +                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
> > +                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
> > +
> > +                if ( reset_state || reset_mem )
> > +                    ASSERT(!mem_sharing_fork_reset(d, reset_state, reset_mem));
>
> Might be appropriate to destroy the domain in case fork reset fails?
> ASSERT will only help in debug builds.

No, I would prefer not destroying the domain here. If it ever becomes
necessary the right way would be to introduce a new monitor event to
signal an error and let the listener decide what to do. At the moment
I don't see that being necessary as there are no known scenarios where
we would be able to setup a fork but fail to reset is.

>
> > +            }
> > +#endif
> >
> >              /*
> >               * Check emulation flags in the arch-specific handler only, as it
> > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> > index a1a0f0233a..f8d26fb77d 100644
> > --- a/xen/include/public/memory.h
> > +++ b/xen/include/public/memory.h
> > @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> >                  uint32_t gref;     /* IN: gref to debug         */
> >              } u;
> >          } debug;
> > -        struct mem_sharing_op_fork {      /* OP_FORK */
> > +        struct mem_sharing_op_fork {      /* OP_FORK{,_RESET} */
> >              domid_t parent_domain;        /* IN: parent's domain id */
> >  /* Only makes sense for short-lived forks */
> >  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> >  /* Only makes sense for short-lived forks */
> >  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
>
> Should you add:
>
> /* Only for OP_FORK_RESET. */
>
> > +#define XENMEM_FORK_RESET_STATE        (1u << 2)
> > +#define XENMEM_FORK_RESET_MEMORY       (1u << 3)

I think the flag names are really descriptive already that these apply
to the FORK_RESET case but I would have no objection to that comment
being added at commit.

Thanks again,
Tamas


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits
  2022-04-25 14:41   ` Roger Pau Monné
@ 2022-04-25 15:40     ` Tamas K Lengyel
  2022-04-26  8:49       ` Roger Pau Monné
  0 siblings, 1 reply; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-25 15:40 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Tamas K Lengyel, xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Julien Grall, Stefano Stabellini, Alexandru Isaila,
	Petre Pircalabu, Jun Nakajima, Kevin Tian

On Mon, Apr 25, 2022 at 10:41 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Wed, Apr 13, 2022 at 09:41:52AM -0400, Tamas K Lengyel wrote:
> > Add monitor event that hooks the vmexit handler allowing for both sync and
> > async monitoring of events. With async monitoring an event is placed on the
> > monitor ring for each exit and the rest of the vmexit handler resumes normally.
> > If there are additional monitor events configured those will also place their
> > respective events on the monitor ring.
> >
> > With the sync version an event is placed on the monitor ring but the handler
> > does not get resumed, thus the sync version is only useful when the VM is not
> > expected to resume normally after the vmexit. Our use-case is primarily with
> > the sync version with VM forks where the fork gets reset after sync vmexit
> > event, thus the rest of the vmexit handler can be safely skipped. This is
> > very useful when we want to avoid Xen crashing the VM under any circumstance,
> > for example during fuzzing. Collecting all vmexit information regardless of
> > the root cause makes it easier to reason about the state of the VM on the
> > monitor side, hence we opt to receive all events, even for external interrupt
> > and NMI exits and let the monitor agent decide how to proceed.
> >
> > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > ---
> > v4: Minor tweaks and more verbose patch description.
> >
> > Note: making the sync version resume-friendly is specifically out-of-scope as
> > it would require significant rearrangement of the vmexit handler. As this
> > feature is not required for our use-case we opt for the version that minimizes
> > impact on the existing code.
> > ---
> >  tools/include/xenctrl.h                |  2 ++
> >  tools/libs/ctrl/xc_monitor.c           | 15 +++++++++++++++
> >  xen/arch/x86/hvm/monitor.c             | 18 ++++++++++++++++++
> >  xen/arch/x86/hvm/vmx/vmx.c             | 12 ++++++++++++
> >  xen/arch/x86/include/asm/domain.h      |  2 ++
> >  xen/arch/x86/include/asm/hvm/monitor.h |  2 ++
> >  xen/arch/x86/include/asm/monitor.h     |  3 ++-
> >  xen/arch/x86/monitor.c                 | 14 ++++++++++++++
> >  xen/include/public/domctl.h            |  6 ++++++
> >  xen/include/public/vm_event.h          |  8 ++++++++
> >  10 files changed, 81 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> > index 1b089a2c02..159eaac050 100644
> > --- a/tools/include/xenctrl.h
> > +++ b/tools/include/xenctrl.h
> > @@ -2096,6 +2096,8 @@ int xc_monitor_privileged_call(xc_interface *xch, uint32_t domain_id,
> >                                 bool enable);
> >  int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
> >                                    bool enable);
> > +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> > +                      bool sync);
> >  /**
> >   * This function enables / disables emulation for each REP for a
> >   * REP-compatible instruction.
> > diff --git a/tools/libs/ctrl/xc_monitor.c b/tools/libs/ctrl/xc_monitor.c
> > index 4ac823e775..c5fa62ff30 100644
> > --- a/tools/libs/ctrl/xc_monitor.c
> > +++ b/tools/libs/ctrl/xc_monitor.c
> > @@ -246,6 +246,21 @@ int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
> >      return do_domctl(xch, &domctl);
> >  }
> >
> > +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> > +                      bool sync)
> > +{
> > +    DECLARE_DOMCTL;
> > +
> > +    domctl.cmd = XEN_DOMCTL_monitor_op;
> > +    domctl.domain = domain_id;
> > +    domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
> > +                                    : XEN_DOMCTL_MONITOR_OP_DISABLE;
> > +    domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_VMEXIT;
> > +    domctl.u.monitor_op.u.vmexit.sync = sync;
> > +
> > +    return do_domctl(xch, &domctl);
> > +}
> > +
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
> > index b44a1e1dfe..64a38e8fa7 100644
> > --- a/xen/arch/x86/hvm/monitor.c
> > +++ b/xen/arch/x86/hvm/monitor.c
> > @@ -328,6 +328,24 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
> >      return monitor_traps(curr, true, &req) >= 0;
> >  }
> >
> > +int hvm_monitor_vmexit(unsigned long exit_reason,
> > +                       unsigned long exit_qualification)
>
> Should this maybe live in vmx code or have 'vmx' in the name
> somewhere, so that if an svm counterpart is added this doesn't need to
> be renamed?

I don't follow. Why would this need to be renamed? I would presume the
same function would be used on both if it comes to that, perhaps with
a unified input structure if the two are not compatible as-is. In any
case, there is no vm_event/monitor support for AMD at all (not just
for this particular event type) and no plans on adding it any time
soon so IMHO we should cross that bridge when and if that becomes
necessary.

>
> > +{
> > +    struct vcpu *curr = current;
> > +    struct arch_domain *ad = &curr->domain->arch;
> > +    vm_event_request_t req = {};
> > +
> > +    ASSERT(ad->monitor.vmexit_enabled);
> > +
> > +    req.reason = VM_EVENT_REASON_VMEXIT;
> > +    req.u.vmexit.reason = exit_reason;
> > +    req.u.vmexit.qualification = exit_qualification;
>
> You could set those fields at definition.

Sure, but this is the established style throughout the file.

> > +
> > +    set_npt_base(curr, &req);
> > +
> > +    return monitor_traps(curr, ad->monitor.vmexit_sync, &req);
> > +}
> > +
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> > index c075370f64..2794db46f9 100644
> > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > @@ -4008,6 +4008,18 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
> >          }
> >      }
> >
> > +    if ( unlikely(currd->arch.monitor.vmexit_enabled) )
> > +    {
> > +        int rc;
> > +
> > +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
> > +        rc = hvm_monitor_vmexit(exit_reason, exit_qualification);
> > +        if ( rc < 0 )
> > +            goto exit_and_crash;
> > +        if ( rc )
> > +            return;
> > +    }
>
> Just for my understanding, is there any reason to not do this before
> updating the altp2m?  AFAICT the update of the active EPTP won't
> affect the call to hvm_monitor_vmexit.

The currently active altp2m information is included in the vm_event
that will be sent out, so it is good to have the correct info for it.
I don't currently plan on using altp2m with this particular even type
but we should make sure it doesn't send out stale info in case someone
wants to use it differently. Certainly no point in sending the event
before it as the exit condition in the altp2m update blob is really
just dead code and can't actually be reached.

>
> > +
> >      /* XXX: This looks ugly, but we need a mechanism to ensure
> >       * any pending vmresume has really happened
> >       */
> > diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
> > index e62e109598..855db352c0 100644
> > --- a/xen/arch/x86/include/asm/domain.h
> > +++ b/xen/arch/x86/include/asm/domain.h
> > @@ -430,6 +430,8 @@ struct arch_domain
> >           */
> >          unsigned int inguest_pagefault_disabled                            : 1;
> >          unsigned int control_register_values                               : 1;
> > +        unsigned int vmexit_enabled                                        : 1;
> > +        unsigned int vmexit_sync                                           : 1;
> >          struct monitor_msr_bitmap *msr_bitmap;
> >          uint64_t write_ctrlreg_mask[4];
> >      } monitor;
> > diff --git a/xen/arch/x86/include/asm/hvm/monitor.h b/xen/arch/x86/include/asm/hvm/monitor.h
> > index a75cd8545c..639f6dfa37 100644
> > --- a/xen/arch/x86/include/asm/hvm/monitor.h
> > +++ b/xen/arch/x86/include/asm/hvm/monitor.h
> > @@ -51,6 +51,8 @@ bool hvm_monitor_emul_unimplemented(void);
> >
> >  bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
> >                             uint16_t kind);
> > +int hvm_monitor_vmexit(unsigned long exit_reason,
> > +                       unsigned long exit_qualification);
> >
> >  #endif /* __ASM_X86_HVM_MONITOR_H__ */
> >
> > diff --git a/xen/arch/x86/include/asm/monitor.h b/xen/arch/x86/include/asm/monitor.h
> > index 01c6d63bb9..d8d54c5f23 100644
> > --- a/xen/arch/x86/include/asm/monitor.h
> > +++ b/xen/arch/x86/include/asm/monitor.h
> > @@ -89,7 +89,8 @@ static inline uint32_t arch_monitor_get_capabilities(struct domain *d)
> >                      (1U << XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION) |
> >                      (1U << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) |
> >                      (1U << XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED) |
> > -                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT));
> > +                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT) |
> > +                    (1U << XEN_DOMCTL_MONITOR_EVENT_VMEXIT));
> >
> >      if ( hvm_is_singlestep_supported() )
> >          capabilities |= (1U << XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP);
> > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> > index 3079726a8b..30ca71432c 100644
> > --- a/xen/arch/x86/monitor.c
> > +++ b/xen/arch/x86/monitor.c
> > @@ -332,6 +332,20 @@ int arch_monitor_domctl_event(struct domain *d,
> >          break;
> >      }
> >
> > +    case XEN_DOMCTL_MONITOR_EVENT_VMEXIT:
> > +    {
> > +        bool old_status = ad->monitor.vmexit_enabled;
> > +
> > +        if ( unlikely(old_status == requested_status) )
> > +            return -EEXIST;
>
> What about if the requested status is the same as the current one, but
> vmexit sync is not?

You need to clear the currently registered event first, then register
the new one.

> IOW, I'm not sure this check is helpful, and you could likely avoid
> the old_status local variable.

It is helpful on the callee side. Usually the callee needs to keep
track of the state of what events are enabled so that it can clean up
after itself and if it ever runs into trying to set the event to
something it's already set to then that indicates its state being
out-of-sync.

>
> > +
> > +        domain_pause(d);
> > +        ad->monitor.vmexit_enabled = requested_status;
> > +        ad->monitor.vmexit_sync = mop->u.vmexit.sync;
> > +        domain_unpause(d);
> > +        break;
> > +    }
> > +
> >      default:
> >          /*
> >           * Should not be reached unless arch_monitor_get_capabilities() is
> > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> > index b85e6170b0..4803ed7afc 100644
> > --- a/xen/include/public/domctl.h
> > +++ b/xen/include/public/domctl.h
> > @@ -1057,6 +1057,7 @@ struct xen_domctl_psr_cmt_op {
> >  #define XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED    10
> >  /* Enabled by default */
> >  #define XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT     11
> > +#define XEN_DOMCTL_MONITOR_EVENT_VMEXIT                12
> >
> >  struct xen_domctl_monitor_op {
> >      uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
> > @@ -1107,6 +1108,11 @@ struct xen_domctl_monitor_op {
> >              /* Pause vCPU until response */
> >              uint8_t sync;
> >          } debug_exception;
> > +
> > +        struct {
> > +            /* Send event and don't process vmexit */
> > +            uint8_t sync;
> > +        } vmexit;
> >      } u;
> >  };
> >
> > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > index 81c2ee28cc..07f106f811 100644
> > --- a/xen/include/public/vm_event.h
> > +++ b/xen/include/public/vm_event.h
> > @@ -175,6 +175,8 @@
> >  #define VM_EVENT_REASON_DESCRIPTOR_ACCESS       13
> >  /* Current instruction is not implemented by the emulator */
> >  #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED      14
> > +/* VMEXIT */
> > +#define VM_EVENT_REASON_VMEXIT                  15
> >
> >  /* Supported values for the vm_event_write_ctrlreg index. */
> >  #define VM_EVENT_X86_CR0    0
> > @@ -394,6 +396,11 @@ struct vm_event_emul_insn_data {
> >      uint8_t data[16]; /* Has to be completely filled */
> >  };
> >
> > +struct vm_event_vmexit {
> > +    uint64_t reason;
> > +    uint64_t qualification;
> > +};
>
> You are exposing an Intel specific interface publicly here.  Might be
> worth adding a note, and/or adding 'intel' or 'vmx' in the structure
> name: vm_event_vmx_exit, so that a vm_event_svm_exit could also be
> added in the future.

All vm_event monitor events are for vmx only right now. We can
certainly do that abstraction if and when someone decides to add svm
support, the ABI is versioned and no structure here is set in stone.
No guarantees are even implied for the structures to remain the same
in any way between one version of the ABI to the next. So with that I
don't see the need for complicating the structures at this time.

Tamas


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-25 15:24   ` Tamas K Lengyel
@ 2022-04-26  8:17     ` Roger Pau Monné
  2022-04-26  8:33       ` Julien Grall
  0 siblings, 1 reply; 19+ messages in thread
From: Roger Pau Monné @ 2022-04-26  8:17 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Tamas K Lengyel, xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Julien Grall, Stefano Stabellini, Alexandru Isaila,
	Petre Pircalabu

On Mon, Apr 25, 2022 at 11:24:37AM -0400, Tamas K Lengyel wrote:
> On Mon, Apr 25, 2022 at 10:12 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> >
> > On Wed, Apr 13, 2022 at 09:41:51AM -0400, Tamas K Lengyel wrote:
> > > Allow specify distinct parts of the fork VM to be reset. This is useful when a
> > > fuzzing operation involves mapping in only a handful of pages that are known
> > > ahead of time. Throwing these pages away just to be re-copied immediately is
> > > expensive, thus allowing to specify partial resets can speed things up.
> > >
> > > Also allow resetting to be initiated from vm_event responses as an
> > > optiomization.
> > >
> > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> >
> > Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Thank you!
> 
> > > ---
> > > v4: No change
> > > v3: Rebase on simpler approach after dropping empty_p2m feature
> > > v2: address review comments and add more sanity checking
> > > ---
> > >  tools/include/xenctrl.h                |  3 ++-
> > >  tools/libs/ctrl/xc_memshr.c            |  7 ++++++-
> > >  xen/arch/x86/include/asm/mem_sharing.h |  9 +++++++++
> > >  xen/arch/x86/mm/mem_sharing.c          | 24 +++++++++++++++++++-----
> > >  xen/common/vm_event.c                  | 15 +++++++++++++++
> > >  xen/include/public/memory.h            |  4 +++-
> > >  xen/include/public/vm_event.h          |  8 ++++++++
> > >  7 files changed, 62 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> > > index 95bd5eca67..1b089a2c02 100644
> > > --- a/tools/include/xenctrl.h
> > > +++ b/tools/include/xenctrl.h
> > > @@ -2290,7 +2290,8 @@ int xc_memshr_fork(xc_interface *xch,
> > >   *
> > >   * With VMs that have a lot of memory this call may block for a long time.
> > >   */
> > > -int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain);
> > > +int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain,
> > > +                         bool reset_state, bool reset_memory);
> > >
> > >  /* Debug calls: return the number of pages referencing the shared frame backing
> > >   * the input argument. Should be one or greater.
> > > diff --git a/tools/libs/ctrl/xc_memshr.c b/tools/libs/ctrl/xc_memshr.c
> > > index a6cfd7dccf..a0d0b894e2 100644
> > > --- a/tools/libs/ctrl/xc_memshr.c
> > > +++ b/tools/libs/ctrl/xc_memshr.c
> > > @@ -257,12 +257,17 @@ int xc_memshr_fork(xc_interface *xch, uint32_t pdomid, uint32_t domid,
> > >      return xc_memshr_memop(xch, domid, &mso);
> > >  }
> > >
> > > -int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid)
> > > +int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid, bool reset_state,
> > > +                         bool reset_memory)
> > >  {
> > >      xen_mem_sharing_op_t mso;
> > >
> > >      memset(&mso, 0, sizeof(mso));
> > >      mso.op = XENMEM_sharing_op_fork_reset;
> > > +    if ( reset_state )
> > > +        mso.u.fork.flags |= XENMEM_FORK_RESET_STATE;
> > > +    if ( reset_memory )
> > > +        mso.u.fork.flags |= XENMEM_FORK_RESET_MEMORY;
> >
> > IMO would be clearer to init mso fields at definition.
> 
> Not sure what you mean exactly, mso = { ... }; ? I think the logic is
> pretty clear as-is and I don't have any preference for one style vs
> the other.

IMO it's clearer to initialize the fields at declaration using
mso = { ... } because then you avoid the memset.

> >
> > > diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> > > index 84cf52636b..d26a6699fc 100644
> > > --- a/xen/common/vm_event.c
> > > +++ b/xen/common/vm_event.c
> > > @@ -28,6 +28,11 @@
> > >  #include <asm/p2m.h>
> > >  #include <asm/monitor.h>
> > >  #include <asm/vm_event.h>
> > > +
> > > +#ifdef CONFIG_MEM_SHARING
> > > +#include <asm/mem_sharing.h>
> > > +#endif
> > > +
> > >  #include <xsm/xsm.h>
> > >  #include <public/hvm/params.h>
> > >
> > > @@ -394,6 +399,16 @@ static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
> > >              if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
> > >                  p2m_mem_paging_resume(d, &rsp);
> > >  #endif
> > > +#ifdef CONFIG_MEM_SHARING
> > > +            if ( mem_sharing_is_fork(d) )
> > > +            {
> > > +                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
> > > +                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
> > > +
> > > +                if ( reset_state || reset_mem )
> > > +                    ASSERT(!mem_sharing_fork_reset(d, reset_state, reset_mem));
> >
> > Might be appropriate to destroy the domain in case fork reset fails?
> > ASSERT will only help in debug builds.
> 
> No, I would prefer not destroying the domain here. If it ever becomes
> necessary the right way would be to introduce a new monitor event to
> signal an error and let the listener decide what to do. At the moment
> I don't see that being necessary as there are no known scenarios where
> we would be able to setup a fork but fail to reset is.

My concern for raising this was what would happen on non-debug
builds if mem_sharing_fork_reset() failed, and hence my request to
crash the domain.  I would have used something like:

if ( (reset_state || reset_mem) &&
     mem_sharing_fork_reset(d, reset_state, reset_mem) )
{
    ASSERT_UNREACHABLE();
    domain_crash(d);
    break;
}

But if you and other vm_event maintainers are fine with the current
approach and don't think it's a problem that's OK with me.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-26  8:17     ` Roger Pau Monné
@ 2022-04-26  8:33       ` Julien Grall
  2022-04-26 10:45         ` Tamas K Lengyel
  0 siblings, 1 reply; 19+ messages in thread
From: Julien Grall @ 2022-04-26  8:33 UTC (permalink / raw)
  To: Roger Pau Monné, Tamas K Lengyel
  Cc: Tamas K Lengyel, xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Stefano Stabellini, Alexandru Isaila, Petre Pircalabu

Hi,

On 26/04/2022 09:17, Roger Pau Monné wrote:
> On Mon, Apr 25, 2022 at 11:24:37AM -0400, Tamas K Lengyel wrote:
>> On Mon, Apr 25, 2022 at 10:12 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
>>>> index 84cf52636b..d26a6699fc 100644
>>>> --- a/xen/common/vm_event.c
>>>> +++ b/xen/common/vm_event.c
>>>> @@ -28,6 +28,11 @@
>>>>   #include <asm/p2m.h>
>>>>   #include <asm/monitor.h>
>>>>   #include <asm/vm_event.h>
>>>> +
>>>> +#ifdef CONFIG_MEM_SHARING
>>>> +#include <asm/mem_sharing.h>
>>>> +#endif
>>>> +
>>>>   #include <xsm/xsm.h>
>>>>   #include <public/hvm/params.h>
>>>>
>>>> @@ -394,6 +399,16 @@ static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
>>>>               if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
>>>>                   p2m_mem_paging_resume(d, &rsp);
>>>>   #endif
>>>> +#ifdef CONFIG_MEM_SHARING
>>>> +            if ( mem_sharing_is_fork(d) )
>>>> +            {
>>>> +                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
>>>> +                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
>>>> +
>>>> +                if ( reset_state || reset_mem )
>>>> +                    ASSERT(!mem_sharing_fork_reset(d, reset_state, reset_mem));
>>>
>>> Might be appropriate to destroy the domain in case fork reset fails?
>>> ASSERT will only help in debug builds.
>>
>> No, I would prefer not destroying the domain here. If it ever becomes
>> necessary the right way would be to introduce a new monitor event to
>> signal an error and let the listener decide what to do. At the moment
>> I don't see that being necessary as there are no known scenarios where
>> we would be able to setup a fork but fail to reset is.
> 
> My concern for raising this was what would happen on non-debug
> builds if mem_sharing_fork_reset() failed, and hence my request to
> crash the domain.  I would have used something like:
> 
> if ( (reset_state || reset_mem) &&
>       mem_sharing_fork_reset(d, reset_state, reset_mem) )
> {
>      ASSERT_UNREACHABLE();
>      domain_crash(d);
>      break;
> }
> 
> But if you and other vm_event maintainers are fine with the current
> approach and don't think it's a problem that's OK with me.

The current approach is actually not correct. On production build, 
ASSERT() will turn to NOP. IOW mem_sharing_fork_reset() *will* not be 
called.

So the call needs to move outside of the ASSERT() and use a construct 
similar to what you suggested:

if ( .... && mem_sharing_fork_reset(...) )
{
   ASSERT_UNREACHABLE();
   break;
}

Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits
  2022-04-25 15:40     ` Tamas K Lengyel
@ 2022-04-26  8:49       ` Roger Pau Monné
  2022-04-26 18:53         ` Tamas K Lengyel
  0 siblings, 1 reply; 19+ messages in thread
From: Roger Pau Monné @ 2022-04-26  8:49 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Tamas K Lengyel, xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Julien Grall, Stefano Stabellini, Alexandru Isaila,
	Petre Pircalabu, Jun Nakajima, Kevin Tian

On Mon, Apr 25, 2022 at 11:40:11AM -0400, Tamas K Lengyel wrote:
> On Mon, Apr 25, 2022 at 10:41 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> >
> > On Wed, Apr 13, 2022 at 09:41:52AM -0400, Tamas K Lengyel wrote:
> > > Add monitor event that hooks the vmexit handler allowing for both sync and
> > > async monitoring of events. With async monitoring an event is placed on the
> > > monitor ring for each exit and the rest of the vmexit handler resumes normally.
> > > If there are additional monitor events configured those will also place their
> > > respective events on the monitor ring.
> > >
> > > With the sync version an event is placed on the monitor ring but the handler
> > > does not get resumed, thus the sync version is only useful when the VM is not
> > > expected to resume normally after the vmexit. Our use-case is primarily with
> > > the sync version with VM forks where the fork gets reset after sync vmexit
> > > event, thus the rest of the vmexit handler can be safely skipped. This is
> > > very useful when we want to avoid Xen crashing the VM under any circumstance,
> > > for example during fuzzing. Collecting all vmexit information regardless of
> > > the root cause makes it easier to reason about the state of the VM on the
> > > monitor side, hence we opt to receive all events, even for external interrupt
> > > and NMI exits and let the monitor agent decide how to proceed.
> > >
> > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > > ---
> > > v4: Minor tweaks and more verbose patch description.
> > >
> > > Note: making the sync version resume-friendly is specifically out-of-scope as
> > > it would require significant rearrangement of the vmexit handler. As this
> > > feature is not required for our use-case we opt for the version that minimizes
> > > impact on the existing code.
> > > ---
> > >  tools/include/xenctrl.h                |  2 ++
> > >  tools/libs/ctrl/xc_monitor.c           | 15 +++++++++++++++
> > >  xen/arch/x86/hvm/monitor.c             | 18 ++++++++++++++++++
> > >  xen/arch/x86/hvm/vmx/vmx.c             | 12 ++++++++++++
> > >  xen/arch/x86/include/asm/domain.h      |  2 ++
> > >  xen/arch/x86/include/asm/hvm/monitor.h |  2 ++
> > >  xen/arch/x86/include/asm/monitor.h     |  3 ++-
> > >  xen/arch/x86/monitor.c                 | 14 ++++++++++++++
> > >  xen/include/public/domctl.h            |  6 ++++++
> > >  xen/include/public/vm_event.h          |  8 ++++++++
> > >  10 files changed, 81 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> > > index 1b089a2c02..159eaac050 100644
> > > --- a/tools/include/xenctrl.h
> > > +++ b/tools/include/xenctrl.h
> > > @@ -2096,6 +2096,8 @@ int xc_monitor_privileged_call(xc_interface *xch, uint32_t domain_id,
> > >                                 bool enable);
> > >  int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
> > >                                    bool enable);
> > > +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> > > +                      bool sync);
> > >  /**
> > >   * This function enables / disables emulation for each REP for a
> > >   * REP-compatible instruction.
> > > diff --git a/tools/libs/ctrl/xc_monitor.c b/tools/libs/ctrl/xc_monitor.c
> > > index 4ac823e775..c5fa62ff30 100644
> > > --- a/tools/libs/ctrl/xc_monitor.c
> > > +++ b/tools/libs/ctrl/xc_monitor.c
> > > @@ -246,6 +246,21 @@ int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
> > >      return do_domctl(xch, &domctl);
> > >  }
> > >
> > > +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> > > +                      bool sync)
> > > +{
> > > +    DECLARE_DOMCTL;
> > > +
> > > +    domctl.cmd = XEN_DOMCTL_monitor_op;
> > > +    domctl.domain = domain_id;
> > > +    domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
> > > +                                    : XEN_DOMCTL_MONITOR_OP_DISABLE;
> > > +    domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_VMEXIT;
> > > +    domctl.u.monitor_op.u.vmexit.sync = sync;
> > > +
> > > +    return do_domctl(xch, &domctl);
> > > +}
> > > +
> > >  /*
> > >   * Local variables:
> > >   * mode: C
> > > diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
> > > index b44a1e1dfe..64a38e8fa7 100644
> > > --- a/xen/arch/x86/hvm/monitor.c
> > > +++ b/xen/arch/x86/hvm/monitor.c
> > > @@ -328,6 +328,24 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
> > >      return monitor_traps(curr, true, &req) >= 0;
> > >  }
> > >
> > > +int hvm_monitor_vmexit(unsigned long exit_reason,
> > > +                       unsigned long exit_qualification)
> >
> > Should this maybe live in vmx code or have 'vmx' in the name
> > somewhere, so that if an svm counterpart is added this doesn't need to
> > be renamed?
> 
> I don't follow. Why would this need to be renamed? I would presume the
> same function would be used on both if it comes to that, perhaps with
> a unified input structure if the two are not compatible as-is. In any
> case, there is no vm_event/monitor support for AMD at all (not just
> for this particular event type) and no plans on adding it any time
> soon so IMHO we should cross that bridge when and if that becomes
> necessary.

SVM has at least 3 fields related to vmexit information AFAICT:
exitcode, exitinfo1 and exitinfo2.

Instead of having an union in hvm_monitor_vmexit to cover all possible
vendor formats it might be easier to just have vmx_ and svm_ specific
functions, so it's contained in vendor specific code.

Or maybe that would be worse because you would have to expose a lot of
vm_event logic to vendor specific code in order to put the request on
the ring?

> >
> > > +{
> > > +    struct vcpu *curr = current;
> > > +    struct arch_domain *ad = &curr->domain->arch;
> > > +    vm_event_request_t req = {};
> > > +
> > > +    ASSERT(ad->monitor.vmexit_enabled);
> > > +
> > > +    req.reason = VM_EVENT_REASON_VMEXIT;
> > > +    req.u.vmexit.reason = exit_reason;
> > > +    req.u.vmexit.qualification = exit_qualification;
> >
> > You could set those fields at definition.
> 
> Sure, but this is the established style throughout the file.
> 
> > > +
> > > +    set_npt_base(curr, &req);
> > > +
> > > +    return monitor_traps(curr, ad->monitor.vmexit_sync, &req);
> > > +}
> > > +
> > >  /*
> > >   * Local variables:
> > >   * mode: C
> > > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> > > index c075370f64..2794db46f9 100644
> > > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > > @@ -4008,6 +4008,18 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
> > >          }
> > >      }
> > >
> > > +    if ( unlikely(currd->arch.monitor.vmexit_enabled) )
> > > +    {
> > > +        int rc;
> > > +
> > > +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
> > > +        rc = hvm_monitor_vmexit(exit_reason, exit_qualification);
> > > +        if ( rc < 0 )
> > > +            goto exit_and_crash;
> > > +        if ( rc )
> > > +            return;
> > > +    }
> >
> > Just for my understanding, is there any reason to not do this before
> > updating the altp2m?  AFAICT the update of the active EPTP won't
> > affect the call to hvm_monitor_vmexit.
> 
> The currently active altp2m information is included in the vm_event
> that will be sent out, so it is good to have the correct info for it.
> I don't currently plan on using altp2m with this particular even type
> but we should make sure it doesn't send out stale info in case someone
> wants to use it differently. Certainly no point in sending the event
> before it as the exit condition in the altp2m update blob is really
> just dead code and can't actually be reached.

Ack, thanks for the explanation.

> >
> > > +
> > >      /* XXX: This looks ugly, but we need a mechanism to ensure
> > >       * any pending vmresume has really happened
> > >       */
> > > diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
> > > index e62e109598..855db352c0 100644
> > > --- a/xen/arch/x86/include/asm/domain.h
> > > +++ b/xen/arch/x86/include/asm/domain.h
> > > @@ -430,6 +430,8 @@ struct arch_domain
> > >           */
> > >          unsigned int inguest_pagefault_disabled                            : 1;
> > >          unsigned int control_register_values                               : 1;
> > > +        unsigned int vmexit_enabled                                        : 1;
> > > +        unsigned int vmexit_sync                                           : 1;
> > >          struct monitor_msr_bitmap *msr_bitmap;
> > >          uint64_t write_ctrlreg_mask[4];
> > >      } monitor;
> > > diff --git a/xen/arch/x86/include/asm/hvm/monitor.h b/xen/arch/x86/include/asm/hvm/monitor.h
> > > index a75cd8545c..639f6dfa37 100644
> > > --- a/xen/arch/x86/include/asm/hvm/monitor.h
> > > +++ b/xen/arch/x86/include/asm/hvm/monitor.h
> > > @@ -51,6 +51,8 @@ bool hvm_monitor_emul_unimplemented(void);
> > >
> > >  bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
> > >                             uint16_t kind);
> > > +int hvm_monitor_vmexit(unsigned long exit_reason,
> > > +                       unsigned long exit_qualification);
> > >
> > >  #endif /* __ASM_X86_HVM_MONITOR_H__ */
> > >
> > > diff --git a/xen/arch/x86/include/asm/monitor.h b/xen/arch/x86/include/asm/monitor.h
> > > index 01c6d63bb9..d8d54c5f23 100644
> > > --- a/xen/arch/x86/include/asm/monitor.h
> > > +++ b/xen/arch/x86/include/asm/monitor.h
> > > @@ -89,7 +89,8 @@ static inline uint32_t arch_monitor_get_capabilities(struct domain *d)
> > >                      (1U << XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION) |
> > >                      (1U << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) |
> > >                      (1U << XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED) |
> > > -                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT));
> > > +                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT) |
> > > +                    (1U << XEN_DOMCTL_MONITOR_EVENT_VMEXIT));
> > >
> > >      if ( hvm_is_singlestep_supported() )
> > >          capabilities |= (1U << XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP);
> > > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> > > index 3079726a8b..30ca71432c 100644
> > > --- a/xen/arch/x86/monitor.c
> > > +++ b/xen/arch/x86/monitor.c
> > > @@ -332,6 +332,20 @@ int arch_monitor_domctl_event(struct domain *d,
> > >          break;
> > >      }
> > >
> > > +    case XEN_DOMCTL_MONITOR_EVENT_VMEXIT:
> > > +    {
> > > +        bool old_status = ad->monitor.vmexit_enabled;
> > > +
> > > +        if ( unlikely(old_status == requested_status) )
> > > +            return -EEXIST;
> >
> > What about if the requested status is the same as the current one, but
> > vmexit sync is not?
> 
> You need to clear the currently registered event first, then register
> the new one.
> 
> > IOW, I'm not sure this check is helpful, and you could likely avoid
> > the old_status local variable.
> 
> It is helpful on the callee side. Usually the callee needs to keep
> track of the state of what events are enabled so that it can clean up
> after itself and if it ever runs into trying to set the event to
> something it's already set to then that indicates its state being
> out-of-sync.

Hm, right.  I wonder if you should also check that the ring is empty
before changing the status?  So that the callee doesn't change the
status while requests are still pending on the ring from the previous
type?

> >
> > > +
> > > +        domain_pause(d);
> > > +        ad->monitor.vmexit_enabled = requested_status;
> > > +        ad->monitor.vmexit_sync = mop->u.vmexit.sync;
> > > +        domain_unpause(d);
> > > +        break;
> > > +    }
> > > +
> > >      default:
> > >          /*
> > >           * Should not be reached unless arch_monitor_get_capabilities() is
> > > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> > > index b85e6170b0..4803ed7afc 100644
> > > --- a/xen/include/public/domctl.h
> > > +++ b/xen/include/public/domctl.h
> > > @@ -1057,6 +1057,7 @@ struct xen_domctl_psr_cmt_op {
> > >  #define XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED    10
> > >  /* Enabled by default */
> > >  #define XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT     11
> > > +#define XEN_DOMCTL_MONITOR_EVENT_VMEXIT                12
> > >
> > >  struct xen_domctl_monitor_op {
> > >      uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
> > > @@ -1107,6 +1108,11 @@ struct xen_domctl_monitor_op {
> > >              /* Pause vCPU until response */
> > >              uint8_t sync;
> > >          } debug_exception;
> > > +
> > > +        struct {
> > > +            /* Send event and don't process vmexit */
> > > +            uint8_t sync;
> > > +        } vmexit;
> > >      } u;
> > >  };
> > >
> > > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > > index 81c2ee28cc..07f106f811 100644
> > > --- a/xen/include/public/vm_event.h
> > > +++ b/xen/include/public/vm_event.h
> > > @@ -175,6 +175,8 @@
> > >  #define VM_EVENT_REASON_DESCRIPTOR_ACCESS       13
> > >  /* Current instruction is not implemented by the emulator */
> > >  #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED      14
> > > +/* VMEXIT */
> > > +#define VM_EVENT_REASON_VMEXIT                  15
> > >
> > >  /* Supported values for the vm_event_write_ctrlreg index. */
> > >  #define VM_EVENT_X86_CR0    0
> > > @@ -394,6 +396,11 @@ struct vm_event_emul_insn_data {
> > >      uint8_t data[16]; /* Has to be completely filled */
> > >  };
> > >
> > > +struct vm_event_vmexit {
> > > +    uint64_t reason;
> > > +    uint64_t qualification;
> > > +};
> >
> > You are exposing an Intel specific interface publicly here.  Might be
> > worth adding a note, and/or adding 'intel' or 'vmx' in the structure
> > name: vm_event_vmx_exit, so that a vm_event_svm_exit could also be
> > added in the future.
> 
> All vm_event monitor events are for vmx only right now. We can
> certainly do that abstraction if and when someone decides to add svm
> support, the ABI is versioned and no structure here is set in stone.
> No guarantees are even implied for the structures to remain the same
> in any way between one version of the ABI to the next. So with that I
> don't see the need for complicating the structures at this time.

Well, it's just altering the name slightly, but the structure layout
would be the same.  Just so that someone wanting to add SVM support
doesn't have to go and rename the VMX specific structures.  I think it
also makes it easier to identify what's vendor specific and what
should be shared between vendors.

I don't think it adds any complications to the code you are adding,
but would make it easier for someone wanting to add a new vendor
support in the future.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable
  2022-04-26  8:33       ` Julien Grall
@ 2022-04-26 10:45         ` Tamas K Lengyel
  0 siblings, 0 replies; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-26 10:45 UTC (permalink / raw)
  To: Julien Grall
  Cc: Roger Pau Monné,
	Tamas K Lengyel, Xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Stefano Stabellini, Alexandru Isaila, Petre Pircalabu

[-- Attachment #1: Type: text/plain, Size: 2941 bytes --]

On Tue, Apr 26, 2022, 4:33 AM Julien Grall <julien@xen.org> wrote:

> Hi,
>
> On 26/04/2022 09:17, Roger Pau Monné wrote:
> > On Mon, Apr 25, 2022 at 11:24:37AM -0400, Tamas K Lengyel wrote:
> >> On Mon, Apr 25, 2022 at 10:12 AM Roger Pau Monné <roger.pau@citrix.com>
> wrote:
> >>>> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> >>>> index 84cf52636b..d26a6699fc 100644
> >>>> --- a/xen/common/vm_event.c
> >>>> +++ b/xen/common/vm_event.c
> >>>> @@ -28,6 +28,11 @@
> >>>>   #include <asm/p2m.h>
> >>>>   #include <asm/monitor.h>
> >>>>   #include <asm/vm_event.h>
> >>>> +
> >>>> +#ifdef CONFIG_MEM_SHARING
> >>>> +#include <asm/mem_sharing.h>
> >>>> +#endif
> >>>> +
> >>>>   #include <xsm/xsm.h>
> >>>>   #include <public/hvm/params.h>
> >>>>
> >>>> @@ -394,6 +399,16 @@ static int vm_event_resume(struct domain *d,
> struct vm_event_domain *ved)
> >>>>               if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
> >>>>                   p2m_mem_paging_resume(d, &rsp);
> >>>>   #endif
> >>>> +#ifdef CONFIG_MEM_SHARING
> >>>> +            if ( mem_sharing_is_fork(d) )
> >>>> +            {
> >>>> +                bool reset_state = rsp.flags &
> VM_EVENT_FLAG_RESET_FORK_STATE;
> >>>> +                bool reset_mem = rsp.flags &
> VM_EVENT_FLAG_RESET_FORK_MEMORY;
> >>>> +
> >>>> +                if ( reset_state || reset_mem )
> >>>> +                    ASSERT(!mem_sharing_fork_reset(d, reset_state,
> reset_mem));
> >>>
> >>> Might be appropriate to destroy the domain in case fork reset fails?
> >>> ASSERT will only help in debug builds.
> >>
> >> No, I would prefer not destroying the domain here. If it ever becomes
> >> necessary the right way would be to introduce a new monitor event to
> >> signal an error and let the listener decide what to do. At the moment
> >> I don't see that being necessary as there are no known scenarios where
> >> we would be able to setup a fork but fail to reset is.
> >
> > My concern for raising this was what would happen on non-debug
> > builds if mem_sharing_fork_reset() failed, and hence my request to
> > crash the domain.  I would have used something like:
> >
> > if ( (reset_state || reset_mem) &&
> >       mem_sharing_fork_reset(d, reset_state, reset_mem) )
> > {
> >      ASSERT_UNREACHABLE();
> >      domain_crash(d);
> >      break;
> > }
> >
> > But if you and other vm_event maintainers are fine with the current
> > approach and don't think it's a problem that's OK with me.
>
> The current approach is actually not correct. On production build,
> ASSERT() will turn to NOP. IOW mem_sharing_fork_reset() *will* not be
> called.
>
> So the call needs to move outside of the ASSERT() and use a construct
> similar to what you suggested:
>
> if ( .... && mem_sharing_fork_reset(...) )
> {
>    ASSERT_UNREACHABLE();
>    break;
> }
>

Ah, good call. Thanks!

Tamas

>

[-- Attachment #2: Type: text/html, Size: 4303 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits
  2022-04-26  8:49       ` Roger Pau Monné
@ 2022-04-26 18:53         ` Tamas K Lengyel
  2022-04-27  9:22           ` Roger Pau Monné
  0 siblings, 1 reply; 19+ messages in thread
From: Tamas K Lengyel @ 2022-04-26 18:53 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Tamas K Lengyel, xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Julien Grall, Stefano Stabellini, Alexandru Isaila,
	Petre Pircalabu, Jun Nakajima, Kevin Tian

On Tue, Apr 26, 2022 at 4:50 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Mon, Apr 25, 2022 at 11:40:11AM -0400, Tamas K Lengyel wrote:
> > On Mon, Apr 25, 2022 at 10:41 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> > >
> > > On Wed, Apr 13, 2022 at 09:41:52AM -0400, Tamas K Lengyel wrote:
> > > > Add monitor event that hooks the vmexit handler allowing for both sync and
> > > > async monitoring of events. With async monitoring an event is placed on the
> > > > monitor ring for each exit and the rest of the vmexit handler resumes normally.
> > > > If there are additional monitor events configured those will also place their
> > > > respective events on the monitor ring.
> > > >
> > > > With the sync version an event is placed on the monitor ring but the handler
> > > > does not get resumed, thus the sync version is only useful when the VM is not
> > > > expected to resume normally after the vmexit. Our use-case is primarily with
> > > > the sync version with VM forks where the fork gets reset after sync vmexit
> > > > event, thus the rest of the vmexit handler can be safely skipped. This is
> > > > very useful when we want to avoid Xen crashing the VM under any circumstance,
> > > > for example during fuzzing. Collecting all vmexit information regardless of
> > > > the root cause makes it easier to reason about the state of the VM on the
> > > > monitor side, hence we opt to receive all events, even for external interrupt
> > > > and NMI exits and let the monitor agent decide how to proceed.
> > > >
> > > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > > > ---
> > > > v4: Minor tweaks and more verbose patch description.
> > > >
> > > > Note: making the sync version resume-friendly is specifically out-of-scope as
> > > > it would require significant rearrangement of the vmexit handler. As this
> > > > feature is not required for our use-case we opt for the version that minimizes
> > > > impact on the existing code.
> > > > ---
> > > >  tools/include/xenctrl.h                |  2 ++
> > > >  tools/libs/ctrl/xc_monitor.c           | 15 +++++++++++++++
> > > >  xen/arch/x86/hvm/monitor.c             | 18 ++++++++++++++++++
> > > >  xen/arch/x86/hvm/vmx/vmx.c             | 12 ++++++++++++
> > > >  xen/arch/x86/include/asm/domain.h      |  2 ++
> > > >  xen/arch/x86/include/asm/hvm/monitor.h |  2 ++
> > > >  xen/arch/x86/include/asm/monitor.h     |  3 ++-
> > > >  xen/arch/x86/monitor.c                 | 14 ++++++++++++++
> > > >  xen/include/public/domctl.h            |  6 ++++++
> > > >  xen/include/public/vm_event.h          |  8 ++++++++
> > > >  10 files changed, 81 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> > > > index 1b089a2c02..159eaac050 100644
> > > > --- a/tools/include/xenctrl.h
> > > > +++ b/tools/include/xenctrl.h
> > > > @@ -2096,6 +2096,8 @@ int xc_monitor_privileged_call(xc_interface *xch, uint32_t domain_id,
> > > >                                 bool enable);
> > > >  int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
> > > >                                    bool enable);
> > > > +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> > > > +                      bool sync);
> > > >  /**
> > > >   * This function enables / disables emulation for each REP for a
> > > >   * REP-compatible instruction.
> > > > diff --git a/tools/libs/ctrl/xc_monitor.c b/tools/libs/ctrl/xc_monitor.c
> > > > index 4ac823e775..c5fa62ff30 100644
> > > > --- a/tools/libs/ctrl/xc_monitor.c
> > > > +++ b/tools/libs/ctrl/xc_monitor.c
> > > > @@ -246,6 +246,21 @@ int xc_monitor_emul_unimplemented(xc_interface *xch, uint32_t domain_id,
> > > >      return do_domctl(xch, &domctl);
> > > >  }
> > > >
> > > > +int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
> > > > +                      bool sync)
> > > > +{
> > > > +    DECLARE_DOMCTL;
> > > > +
> > > > +    domctl.cmd = XEN_DOMCTL_monitor_op;
> > > > +    domctl.domain = domain_id;
> > > > +    domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
> > > > +                                    : XEN_DOMCTL_MONITOR_OP_DISABLE;
> > > > +    domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_VMEXIT;
> > > > +    domctl.u.monitor_op.u.vmexit.sync = sync;
> > > > +
> > > > +    return do_domctl(xch, &domctl);
> > > > +}
> > > > +
> > > >  /*
> > > >   * Local variables:
> > > >   * mode: C
> > > > diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
> > > > index b44a1e1dfe..64a38e8fa7 100644
> > > > --- a/xen/arch/x86/hvm/monitor.c
> > > > +++ b/xen/arch/x86/hvm/monitor.c
> > > > @@ -328,6 +328,24 @@ bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
> > > >      return monitor_traps(curr, true, &req) >= 0;
> > > >  }
> > > >
> > > > +int hvm_monitor_vmexit(unsigned long exit_reason,
> > > > +                       unsigned long exit_qualification)
> > >
> > > Should this maybe live in vmx code or have 'vmx' in the name
> > > somewhere, so that if an svm counterpart is added this doesn't need to
> > > be renamed?
> >
> > I don't follow. Why would this need to be renamed? I would presume the
> > same function would be used on both if it comes to that, perhaps with
> > a unified input structure if the two are not compatible as-is. In any
> > case, there is no vm_event/monitor support for AMD at all (not just
> > for this particular event type) and no plans on adding it any time
> > soon so IMHO we should cross that bridge when and if that becomes
> > necessary.
>
> SVM has at least 3 fields related to vmexit information AFAICT:
> exitcode, exitinfo1 and exitinfo2.
>
> Instead of having an union in hvm_monitor_vmexit to cover all possible
> vendor formats it might be easier to just have vmx_ and svm_ specific
> functions, so it's contained in vendor specific code.
>
> Or maybe that would be worse because you would have to expose a lot of
> vm_event logic to vendor specific code in order to put the request on
> the ring?

I would say it would be worse, I rather not have this code be littered
all over the place unless it must be.

>
> > >
> > > > +{
> > > > +    struct vcpu *curr = current;
> > > > +    struct arch_domain *ad = &curr->domain->arch;
> > > > +    vm_event_request_t req = {};
> > > > +
> > > > +    ASSERT(ad->monitor.vmexit_enabled);
> > > > +
> > > > +    req.reason = VM_EVENT_REASON_VMEXIT;
> > > > +    req.u.vmexit.reason = exit_reason;
> > > > +    req.u.vmexit.qualification = exit_qualification;
> > >
> > > You could set those fields at definition.
> >
> > Sure, but this is the established style throughout the file.
> >
> > > > +
> > > > +    set_npt_base(curr, &req);
> > > > +
> > > > +    return monitor_traps(curr, ad->monitor.vmexit_sync, &req);
> > > > +}
> > > > +
> > > >  /*
> > > >   * Local variables:
> > > >   * mode: C
> > > > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> > > > index c075370f64..2794db46f9 100644
> > > > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > > > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > > > @@ -4008,6 +4008,18 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
> > > >          }
> > > >      }
> > > >
> > > > +    if ( unlikely(currd->arch.monitor.vmexit_enabled) )
> > > > +    {
> > > > +        int rc;
> > > > +
> > > > +        __vmread(EXIT_QUALIFICATION, &exit_qualification);
> > > > +        rc = hvm_monitor_vmexit(exit_reason, exit_qualification);
> > > > +        if ( rc < 0 )
> > > > +            goto exit_and_crash;
> > > > +        if ( rc )
> > > > +            return;
> > > > +    }
> > >
> > > Just for my understanding, is there any reason to not do this before
> > > updating the altp2m?  AFAICT the update of the active EPTP won't
> > > affect the call to hvm_monitor_vmexit.
> >
> > The currently active altp2m information is included in the vm_event
> > that will be sent out, so it is good to have the correct info for it.
> > I don't currently plan on using altp2m with this particular even type
> > but we should make sure it doesn't send out stale info in case someone
> > wants to use it differently. Certainly no point in sending the event
> > before it as the exit condition in the altp2m update blob is really
> > just dead code and can't actually be reached.
>
> Ack, thanks for the explanation.
>
> > >
> > > > +
> > > >      /* XXX: This looks ugly, but we need a mechanism to ensure
> > > >       * any pending vmresume has really happened
> > > >       */
> > > > diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
> > > > index e62e109598..855db352c0 100644
> > > > --- a/xen/arch/x86/include/asm/domain.h
> > > > +++ b/xen/arch/x86/include/asm/domain.h
> > > > @@ -430,6 +430,8 @@ struct arch_domain
> > > >           */
> > > >          unsigned int inguest_pagefault_disabled                            : 1;
> > > >          unsigned int control_register_values                               : 1;
> > > > +        unsigned int vmexit_enabled                                        : 1;
> > > > +        unsigned int vmexit_sync                                           : 1;
> > > >          struct monitor_msr_bitmap *msr_bitmap;
> > > >          uint64_t write_ctrlreg_mask[4];
> > > >      } monitor;
> > > > diff --git a/xen/arch/x86/include/asm/hvm/monitor.h b/xen/arch/x86/include/asm/hvm/monitor.h
> > > > index a75cd8545c..639f6dfa37 100644
> > > > --- a/xen/arch/x86/include/asm/hvm/monitor.h
> > > > +++ b/xen/arch/x86/include/asm/hvm/monitor.h
> > > > @@ -51,6 +51,8 @@ bool hvm_monitor_emul_unimplemented(void);
> > > >
> > > >  bool hvm_monitor_check_p2m(unsigned long gla, gfn_t gfn, uint32_t pfec,
> > > >                             uint16_t kind);
> > > > +int hvm_monitor_vmexit(unsigned long exit_reason,
> > > > +                       unsigned long exit_qualification);
> > > >
> > > >  #endif /* __ASM_X86_HVM_MONITOR_H__ */
> > > >
> > > > diff --git a/xen/arch/x86/include/asm/monitor.h b/xen/arch/x86/include/asm/monitor.h
> > > > index 01c6d63bb9..d8d54c5f23 100644
> > > > --- a/xen/arch/x86/include/asm/monitor.h
> > > > +++ b/xen/arch/x86/include/asm/monitor.h
> > > > @@ -89,7 +89,8 @@ static inline uint32_t arch_monitor_get_capabilities(struct domain *d)
> > > >                      (1U << XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION) |
> > > >                      (1U << XEN_DOMCTL_MONITOR_EVENT_WRITE_CTRLREG) |
> > > >                      (1U << XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED) |
> > > > -                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT));
> > > > +                    (1U << XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT) |
> > > > +                    (1U << XEN_DOMCTL_MONITOR_EVENT_VMEXIT));
> > > >
> > > >      if ( hvm_is_singlestep_supported() )
> > > >          capabilities |= (1U << XEN_DOMCTL_MONITOR_EVENT_SINGLESTEP);
> > > > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> > > > index 3079726a8b..30ca71432c 100644
> > > > --- a/xen/arch/x86/monitor.c
> > > > +++ b/xen/arch/x86/monitor.c
> > > > @@ -332,6 +332,20 @@ int arch_monitor_domctl_event(struct domain *d,
> > > >          break;
> > > >      }
> > > >
> > > > +    case XEN_DOMCTL_MONITOR_EVENT_VMEXIT:
> > > > +    {
> > > > +        bool old_status = ad->monitor.vmexit_enabled;
> > > > +
> > > > +        if ( unlikely(old_status == requested_status) )
> > > > +            return -EEXIST;
> > >
> > > What about if the requested status is the same as the current one, but
> > > vmexit sync is not?
> >
> > You need to clear the currently registered event first, then register
> > the new one.
> >
> > > IOW, I'm not sure this check is helpful, and you could likely avoid
> > > the old_status local variable.
> >
> > It is helpful on the callee side. Usually the callee needs to keep
> > track of the state of what events are enabled so that it can clean up
> > after itself and if it ever runs into trying to set the event to
> > something it's already set to then that indicates its state being
> > out-of-sync.
>
> Hm, right.  I wonder if you should also check that the ring is empty
> before changing the status?  So that the callee doesn't change the
> status while requests are still pending on the ring from the previous
> type?

No, that becomes tricky because really the only way to ensure the ring
remains empty from the userspace is to pause the domain, which is very
heavy handed. There is nothing wrong with asking Xen not to produce
more of a certain type of request while still being able to handle the
ones that are already on the ring. For setups where the two should
happen at the same time is where the toolstack first pauses the
domain, clears the ring, then disables the event. Both are valid
approaches.

>
> > >
> > > > +
> > > > +        domain_pause(d);
> > > > +        ad->monitor.vmexit_enabled = requested_status;
> > > > +        ad->monitor.vmexit_sync = mop->u.vmexit.sync;
> > > > +        domain_unpause(d);
> > > > +        break;
> > > > +    }
> > > > +
> > > >      default:
> > > >          /*
> > > >           * Should not be reached unless arch_monitor_get_capabilities() is
> > > > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> > > > index b85e6170b0..4803ed7afc 100644
> > > > --- a/xen/include/public/domctl.h
> > > > +++ b/xen/include/public/domctl.h
> > > > @@ -1057,6 +1057,7 @@ struct xen_domctl_psr_cmt_op {
> > > >  #define XEN_DOMCTL_MONITOR_EVENT_EMUL_UNIMPLEMENTED    10
> > > >  /* Enabled by default */
> > > >  #define XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT     11
> > > > +#define XEN_DOMCTL_MONITOR_EVENT_VMEXIT                12
> > > >
> > > >  struct xen_domctl_monitor_op {
> > > >      uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
> > > > @@ -1107,6 +1108,11 @@ struct xen_domctl_monitor_op {
> > > >              /* Pause vCPU until response */
> > > >              uint8_t sync;
> > > >          } debug_exception;
> > > > +
> > > > +        struct {
> > > > +            /* Send event and don't process vmexit */
> > > > +            uint8_t sync;
> > > > +        } vmexit;
> > > >      } u;
> > > >  };
> > > >
> > > > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > > > index 81c2ee28cc..07f106f811 100644
> > > > --- a/xen/include/public/vm_event.h
> > > > +++ b/xen/include/public/vm_event.h
> > > > @@ -175,6 +175,8 @@
> > > >  #define VM_EVENT_REASON_DESCRIPTOR_ACCESS       13
> > > >  /* Current instruction is not implemented by the emulator */
> > > >  #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED      14
> > > > +/* VMEXIT */
> > > > +#define VM_EVENT_REASON_VMEXIT                  15
> > > >
> > > >  /* Supported values for the vm_event_write_ctrlreg index. */
> > > >  #define VM_EVENT_X86_CR0    0
> > > > @@ -394,6 +396,11 @@ struct vm_event_emul_insn_data {
> > > >      uint8_t data[16]; /* Has to be completely filled */
> > > >  };
> > > >
> > > > +struct vm_event_vmexit {
> > > > +    uint64_t reason;
> > > > +    uint64_t qualification;
> > > > +};
> > >
> > > You are exposing an Intel specific interface publicly here.  Might be
> > > worth adding a note, and/or adding 'intel' or 'vmx' in the structure
> > > name: vm_event_vmx_exit, so that a vm_event_svm_exit could also be
> > > added in the future.
> >
> > All vm_event monitor events are for vmx only right now. We can
> > certainly do that abstraction if and when someone decides to add svm
> > support, the ABI is versioned and no structure here is set in stone.
> > No guarantees are even implied for the structures to remain the same
> > in any way between one version of the ABI to the next. So with that I
> > don't see the need for complicating the structures at this time.
>
> Well, it's just altering the name slightly, but the structure layout
> would be the same.  Just so that someone wanting to add SVM support
> doesn't have to go and rename the VMX specific structures.  I think it
> also makes it easier to identify what's vendor specific and what
> should be shared between vendors.
>
> I don't think it adds any complications to the code you are adding,
> but would make it easier for someone wanting to add a new vendor
> support in the future.

I really don't think it's necessary at this point but oh well, I'm not
going to get hung up on that.

Tamas


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits
  2022-04-26 18:53         ` Tamas K Lengyel
@ 2022-04-27  9:22           ` Roger Pau Monné
  0 siblings, 0 replies; 19+ messages in thread
From: Roger Pau Monné @ 2022-04-27  9:22 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Tamas K Lengyel, xen-devel, Wei Liu, Anthony PERARD,
	Juergen Gross, Andrew Cooper, George Dunlap, Jan Beulich,
	Julien Grall, Stefano Stabellini, Alexandru Isaila,
	Petre Pircalabu, Jun Nakajima, Kevin Tian

On Tue, Apr 26, 2022 at 02:53:54PM -0400, Tamas K Lengyel wrote:
> On Tue, Apr 26, 2022 at 4:50 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> >
> > On Mon, Apr 25, 2022 at 11:40:11AM -0400, Tamas K Lengyel wrote:
> > > On Mon, Apr 25, 2022 at 10:41 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> > > >
> > > > On Wed, Apr 13, 2022 at 09:41:52AM -0400, Tamas K Lengyel wrote:
> > > > > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> > > > > index 3079726a8b..30ca71432c 100644
> > > > > --- a/xen/arch/x86/monitor.c
> > > > > +++ b/xen/arch/x86/monitor.c
> > > > > @@ -332,6 +332,20 @@ int arch_monitor_domctl_event(struct domain *d,
> > > > >          break;
> > > > >      }
> > > > >
> > > > > +    case XEN_DOMCTL_MONITOR_EVENT_VMEXIT:
> > > > > +    {
> > > > > +        bool old_status = ad->monitor.vmexit_enabled;
> > > > > +
> > > > > +        if ( unlikely(old_status == requested_status) )
> > > > > +            return -EEXIST;
> > > >
> > > > What about if the requested status is the same as the current one, but
> > > > vmexit sync is not?
> > >
> > > You need to clear the currently registered event first, then register
> > > the new one.
> > >
> > > > IOW, I'm not sure this check is helpful, and you could likely avoid
> > > > the old_status local variable.
> > >
> > > It is helpful on the callee side. Usually the callee needs to keep
> > > track of the state of what events are enabled so that it can clean up
> > > after itself and if it ever runs into trying to set the event to
> > > something it's already set to then that indicates its state being
> > > out-of-sync.
> >
> > Hm, right.  I wonder if you should also check that the ring is empty
> > before changing the status?  So that the callee doesn't change the
> > status while requests are still pending on the ring from the previous
> > type?
> 
> No, that becomes tricky because really the only way to ensure the ring
> remains empty from the userspace is to pause the domain, which is very
> heavy handed. There is nothing wrong with asking Xen not to produce
> more of a certain type of request while still being able to handle the
> ones that are already on the ring. For setups where the two should
> happen at the same time is where the toolstack first pauses the
> domain, clears the ring, then disables the event. Both are valid
> approaches.

OK, so we rely on the callee to drain the ring properly when wanting
to change VMEXIT events.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2022-04-27  9:22 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-13 13:41 [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
2022-04-13 13:41 ` [PATCH v4 2/2] x86/monitor: Add new monitor event to catch all vmexits Tamas K Lengyel
2022-04-22 14:08   ` Tamas K Lengyel
2022-04-25 14:41   ` Roger Pau Monné
2022-04-25 15:40     ` Tamas K Lengyel
2022-04-26  8:49       ` Roger Pau Monné
2022-04-26 18:53         ` Tamas K Lengyel
2022-04-27  9:22           ` Roger Pau Monné
2022-04-22 14:07 ` [PATCH v4 1/2] x86/mem_sharing: make fork_reset more configurable Tamas K Lengyel
2022-04-25  7:49   ` Jan Beulich
2022-04-25 11:29     ` Tamas K Lengyel
2022-04-25 11:41       ` Jan Beulich
2022-04-25 12:52       ` George Dunlap
2022-04-25 13:26         ` Tamas K Lengyel
2022-04-25 14:11 ` Roger Pau Monné
2022-04-25 15:24   ` Tamas K Lengyel
2022-04-26  8:17     ` Roger Pau Monné
2022-04-26  8:33       ` Julien Grall
2022-04-26 10:45         ` Tamas K Lengyel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.