All of lore.kernel.org
 help / color / mirror / Atom feed
From: KarimAllah Ahmed <karahmed@amazon.de>
To: kvm@vger.kernel.org, x86@kernel.org
Cc: KarimAllah Ahmed <karahmed@amazon.de>
Subject: [RFC 1/2] KVM/nVMX: Cleanly exit from L2 to L1 on user-space exit
Date: Fri, 16 Feb 2018 15:23:49 +0100	[thread overview]
Message-ID: <1518791030-31765-2-git-send-email-karahmed@amazon.de> (raw)
In-Reply-To: <1518791030-31765-1-git-send-email-karahmed@amazon.de>

On exit to L0 user-space, always exit from L2 to L1 and synchronize the
state properly for L1. This ensures that user-space only ever sees L1
state. It also allows L1 to be saved and resumed properly. Obviously
horrible things will still happen to the L2 guest. This will be handled in
a seperate patch.

There is only a single case which requires a bit of extra care. When the
decision to switch to user space happens while handling an L1
VMRESUME/VMLAUNCH (i.e. pending_nested_run). In order to handle this
as cleanly as possible without major restructuring, we simply do not exit
to user-space in this case and give L2 another chance to actually run. We
also request an immediate exit to ensure that an exit to user space will
still happen for the L2.

The only reason I can see where an exit to user space will occur while L2
is running is because of a pending signal. The is how user space preempts
the KVM_RUN in order to save the state. L2 exits are either handled in L0
kernel or reflected to L1 and not handled in L0 user-space.

Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c              | 39 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              | 33 ++++++++++++++++++++++++++++-----
 3 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 318a414..2c8be56 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -961,6 +961,8 @@ struct kvm_x86_ops {
 			      struct msr_bitmap_range *whitelist);
 
 	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
+	void (*prepare_exit_user)(struct kvm_vcpu *vcpu);
+	bool (*allow_exit_user)(struct kvm_vcpu *vcpu);
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 52539be..22eb0dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2130,6 +2130,42 @@ static unsigned long segment_base(u16 selector)
 }
 #endif
 
+static bool vmx_allow_exit_user(struct kvm_vcpu *vcpu)
+{
+	return !to_vmx(vcpu)->nested.nested_run_pending;
+}
+
+static void vmx_prepare_exit_user(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vmx->nested.current_vmptr == -1ull)
+                return;
+
+	/*
+	 * If L2 is running no need to update vmcs12 from shadow VMCS.
+	 * Just force an exit from L2 to L1
+	 */
+	if (is_guest_mode(vcpu)) {
+		/*
+		 * Pretend that an external interrupt occurred while L2 is
+		 * running to cleanly exit into L1.
+		 */
+		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+
+		/* Switch from L2 MMU to L1 MMU */
+		kvm_mmu_reset_context(vcpu);
+	} else if (enable_shadow_vmcs) {
+		copy_shadow_to_vmcs12(vmx);
+	}
+
+	/* Flush VMCS12 to guest memory */
+	kvm_write_guest(vcpu->kvm, vmx->nested.current_vmptr,
+			get_vmcs12(vcpu), sizeof(*vmx->nested.cached_vmcs12));
+
+	return;
+}
+
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -12440,6 +12476,9 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
 	.whitelist_msrs = vmx_whitelist_msrs,
 
+	.prepare_exit_user = vmx_prepare_exit_user,
+	.allow_exit_user = vmx_allow_exit_user,
+
 	.prepare_guest_switch = vmx_save_host_state,
 	.vcpu_load = vmx_vcpu_load,
 	.vcpu_put = vmx_vcpu_put,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2cfbf39..8256a2d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -996,6 +996,12 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_rdpmc);
 
+static __always_inline bool should_exit_user(struct kvm_vcpu *vcpu)
+{
+	return signal_pending(current) && (kvm_x86_ops->allow_exit_user ?
+					   kvm_x86_ops->allow_exit_user(vcpu): true);
+}
+
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -7187,8 +7193,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
 		kvm_x86_ops->sync_pir_to_irr(vcpu);
 
-	if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
-	    || need_resched() || signal_pending(current)) {
+	if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || need_resched()) {
 		vcpu->mode = OUTSIDE_GUEST_MODE;
 		smp_wmb();
 		local_irq_enable();
@@ -7198,6 +7203,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto cancel_injection;
 	}
 
+	if (signal_pending(current)) {
+		if (kvm_x86_ops->allow_exit_user &&
+		    kvm_x86_ops->allow_exit_user(vcpu)) {
+			vcpu->mode = OUTSIDE_GUEST_MODE;
+			smp_wmb();
+			local_irq_enable();
+			preempt_enable();
+			vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+			r = 1;
+			goto cancel_injection;
+		} else
+			req_immediate_exit = true;
+	}
+
 	kvm_load_guest_xcr0(vcpu);
 
 	if (req_immediate_exit) {
@@ -7364,7 +7383,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 
 		kvm_check_async_pf_completion(vcpu);
 
-		if (signal_pending(current)) {
+		if (should_exit_user(vcpu)) {
 			r = -EINTR;
 			vcpu->run->exit_reason = KVM_EXIT_INTR;
 			++vcpu->stat.signal_exits;
@@ -7506,11 +7525,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	} else
 		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
-	if (kvm_run->immediate_exit)
+	if (kvm_run->immediate_exit) {
 		r = -EINTR;
-	else
+	} else {
 		r = vcpu_run(vcpu);
 
+		if (kvm_x86_ops->prepare_exit_user)
+			kvm_x86_ops->prepare_exit_user(vcpu);
+	}
+
 out:
 	kvm_put_guest_fpu(vcpu);
 	post_kvm_run_save(vcpu);
-- 
2.7.4

  reply	other threads:[~2018-02-16 14:24 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-16 14:23 [RFC 0/2] KVM/nVMX: Add support for saving and restoring L1 hypervisors with its running L2s KarimAllah Ahmed
2018-02-16 14:23 ` KarimAllah Ahmed [this message]
2018-02-16 14:52   ` [RFC 1/2] KVM/nVMX: Cleanly exit from L2 to L1 on user-space exit Paolo Bonzini
2018-02-16 15:23     ` KarimAllah Ahmed
2018-02-16 14:23 ` [RFC 2/2] KVM/nVMX: Add support for saving/restoring L2 meta-state stored by L0 KarimAllah Ahmed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1518791030-31765-2-git-send-email-karahmed@amazon.de \
    --to=karahmed@amazon.de \
    --cc=kvm@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.