All of lore.kernel.org
 help / color / mirror / Atom feed
From: Razvan Cojocaru <rcojocaru@bitdefender.com>
To: xen-devel@lists.xen.org
Cc: andrew.cooper3@citrix.com, tamas@tklengyel.com,
	Razvan Cojocaru <rcojocaru@bitdefender.com>,
	jbeulich@suse.com
Subject: [PATCH V2 2/2] x86/vm_event: fix race between __context_switch() and vm_event_resume()
Date: Wed,  3 May 2017 12:10:03 +0300	[thread overview]
Message-ID: <1493802603-4978-3-git-send-email-rcojocaru@bitdefender.com> (raw)
In-Reply-To: <1493802603-4978-1-git-send-email-rcojocaru@bitdefender.com>

The introspection agent can reply to a vm_event faster than
vmx_vmexit_handler() can complete in some cases, where it is then
not safe for vm_event_set_registers() to modify v->arch.user_regs.
In the test scenario, we were stepping over an INT3 breakpoint by
setting RIP += 1. The quick reply tended to complete before the VCPU
triggering the introspection event had properly paused and been
descheduled. If the reply occurs before __context_switch() happens,
__context_switch() clobbers the reply by overwriting
v->arch.user_regs from the stack. If the reply occurs after
__context_switch(), we don't pass through __context_switch() when
transitioning to idle.

This patch ensures that vm_event_resume() code only sets per-VCPU
data to be used for the actual setting of registers later in
hvm_do_resume() (similar to the model used to control setting of CRs
and MSRs).

The patch additionally removes the sync_vcpu_execstate(v) call from
vm_event_resume(), which is no longer necessary, which removes the
associated broadcast TLB flush (read: performance improvement).

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
 xen/arch/x86/hvm/vm_event.c    | 35 +++++++++++++++++++++++++++++++++++
 xen/arch/x86/vm_event.c        | 22 ++--------------------
 xen/common/vm_event.c          | 17 ++++++++++-------
 xen/include/asm-x86/vm_event.h |  2 ++
 4 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/hvm/vm_event.c b/xen/arch/x86/hvm/vm_event.c
index b35ee12..bfb95a2 100644
--- a/xen/arch/x86/hvm/vm_event.c
+++ b/xen/arch/x86/hvm/vm_event.c
@@ -23,6 +23,39 @@
 #include <asm/hvm/support.h>
 #include <asm/vm_event.h>
 
+static void hvm_vm_event_set_registers(const struct vcpu *v)
+{
+    ASSERT(v == current);
+
+    if ( unlikely(v->arch.vm_event->set_gprs) )
+    {
+        struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+        regs->rax = v->arch.vm_event->gprs.rax;
+        regs->rbx = v->arch.vm_event->gprs.rbx;
+        regs->rcx = v->arch.vm_event->gprs.rcx;
+        regs->rdx = v->arch.vm_event->gprs.rdx;
+        regs->rsp = v->arch.vm_event->gprs.rsp;
+        regs->rbp = v->arch.vm_event->gprs.rbp;
+        regs->rsi = v->arch.vm_event->gprs.rsi;
+        regs->rdi = v->arch.vm_event->gprs.rdi;
+
+        regs->r8 = v->arch.vm_event->gprs.r8;
+        regs->r9 = v->arch.vm_event->gprs.r9;
+        regs->r10 = v->arch.vm_event->gprs.r10;
+        regs->r11 = v->arch.vm_event->gprs.r11;
+        regs->r12 = v->arch.vm_event->gprs.r12;
+        regs->r13 = v->arch.vm_event->gprs.r13;
+        regs->r14 = v->arch.vm_event->gprs.r14;
+        regs->r15 = v->arch.vm_event->gprs.r15;
+
+        regs->rflags = v->arch.vm_event->gprs.rflags;
+        regs->rip = v->arch.vm_event->gprs.rip;
+
+        v->arch.vm_event->set_gprs = false;
+    }
+}
+
 void hvm_vm_event_do_resume(struct vcpu *v)
 {
     struct monitor_write_data *w;
@@ -30,6 +63,8 @@ void hvm_vm_event_do_resume(struct vcpu *v)
     if ( likely(!v->arch.vm_event) )
         return;
 
+    hvm_vm_event_set_registers(v);
+
     w = &v->arch.vm_event->write_data;
 
     if ( unlikely(v->arch.vm_event->emulate_flags) )
diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
index 502ccc2..a6ea42c 100644
--- a/xen/arch/x86/vm_event.c
+++ b/xen/arch/x86/vm_event.c
@@ -113,26 +113,8 @@ void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp)
 {
     ASSERT(atomic_read(&v->vm_event_pause_count));
 
-    v->arch.user_regs.rax = rsp->data.regs.x86.rax;
-    v->arch.user_regs.rbx = rsp->data.regs.x86.rbx;
-    v->arch.user_regs.rcx = rsp->data.regs.x86.rcx;
-    v->arch.user_regs.rdx = rsp->data.regs.x86.rdx;
-    v->arch.user_regs.rsp = rsp->data.regs.x86.rsp;
-    v->arch.user_regs.rbp = rsp->data.regs.x86.rbp;
-    v->arch.user_regs.rsi = rsp->data.regs.x86.rsi;
-    v->arch.user_regs.rdi = rsp->data.regs.x86.rdi;
-
-    v->arch.user_regs.r8 = rsp->data.regs.x86.r8;
-    v->arch.user_regs.r9 = rsp->data.regs.x86.r9;
-    v->arch.user_regs.r10 = rsp->data.regs.x86.r10;
-    v->arch.user_regs.r11 = rsp->data.regs.x86.r11;
-    v->arch.user_regs.r12 = rsp->data.regs.x86.r12;
-    v->arch.user_regs.r13 = rsp->data.regs.x86.r13;
-    v->arch.user_regs.r14 = rsp->data.regs.x86.r14;
-    v->arch.user_regs.r15 = rsp->data.regs.x86.r15;
-
-    v->arch.user_regs.rflags = rsp->data.regs.x86.rflags;
-    v->arch.user_regs.rip = rsp->data.regs.x86.rip;
+    v->arch.vm_event->gprs = rsp->data.regs.x86;
+    v->arch.vm_event->set_gprs = true;
 }
 
 void vm_event_monitor_next_interrupt(struct vcpu *v)
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 0fe9a53..9291db6 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -357,6 +357,16 @@ void vm_event_resume(struct domain *d, struct vm_event_domain *ved)
 {
     vm_event_response_t rsp;
 
+    /*
+     * vm_event_resume() runs in either XEN_DOMCTL_VM_EVENT_OP_*, or
+     * EVTCHN_send context from the introspection consumer. Both contexts
+     * are guaranteed not to be the subject of vm_event responses.
+     * While we could ASSERT(v != current) for each VCPU in d in the loop
+     * below, this covers the case where we would need to iterate over all
+     * of them more succintly.
+     */
+    ASSERT(d != current->domain);
+
     /* Pull all responses off the ring. */
     while ( vm_event_get_response(d, ved, &rsp) )
     {
@@ -375,13 +385,6 @@ void vm_event_resume(struct domain *d, struct vm_event_domain *ved)
         v = d->vcpu[rsp.vcpu_id];
 
         /*
-         * Make sure the vCPU state has been synchronized for the custom
-         * handlers.
-         */
-        if ( atomic_read(&v->vm_event_pause_count) )
-            sync_vcpu_execstate(v);
-
-        /*
          * In some cases the response type needs extra handling, so here
          * we call the appropriate handlers.
          */
diff --git a/xen/include/asm-x86/vm_event.h b/xen/include/asm-x86/vm_event.h
index ca73f99..39e73c8 100644
--- a/xen/include/asm-x86/vm_event.h
+++ b/xen/include/asm-x86/vm_event.h
@@ -32,6 +32,8 @@ struct arch_vm_event {
         struct vm_event_emul_insn_data insn;
     } emul;
     struct monitor_write_data write_data;
+    struct vm_event_regs_x86 gprs;
+    bool set_gprs;
 };
 
 int vm_event_init_domain(struct domain *d);
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-05-03  9:10 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-03  9:10 [PATCH V2 0/2] Fix vm_event resume path race condition Razvan Cojocaru
2017-05-03  9:10 ` [PATCH V2 1/2] x86/vm_event: added hvm/vm_event.{h,c} Razvan Cojocaru
2017-05-03  9:51   ` [PATCH V2 1/2] x86/vm_event: added hvm/vm_event.{h, c} Jan Beulich
2017-05-03 10:37     ` Razvan Cojocaru
2017-05-03 10:48       ` Jan Beulich
2017-05-03 20:05         ` Tamas K Lengyel
2017-05-03 20:16           ` Razvan Cojocaru
2017-05-03 20:32             ` Tamas K Lengyel
2017-05-03 20:38               ` Razvan Cojocaru
2017-05-03  9:10 ` Razvan Cojocaru [this message]
2017-05-03 10:01   ` [PATCH V2 2/2] x86/vm_event: fix race between __context_switch() and vm_event_resume() Jan Beulich
2017-05-03 10:40     ` Razvan Cojocaru
2017-05-03 20:10   ` Tamas K Lengyel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1493802603-4978-3-git-send-email-rcojocaru@bitdefender.com \
    --to=rcojocaru@bitdefender.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=tamas@tklengyel.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.