All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nadav Har'El" <nyh@il.ibm.com>
To: kvm@vger.kernel.org
Cc: gleb@redhat.com, avi@redhat.com
Subject: [PATCH 07/29] nVMX: Hold a vmcs02 for each vmcs12
Date: Thu, 27 Jan 2011 10:33:26 +0200	[thread overview]
Message-ID: <201101270833.p0R8XQ4w002480@rice.haifa.ibm.com> (raw)
In-Reply-To: 1296116987-nyh@il.ibm.com

In this patch we add a list of L0 (hardware) VMCSs, which we'll use to hold a 
hardware VMCS for each active vmcs12 (i.e., for each L2 guest).

We call each of these L0 VMCSs a "vmcs02", as it is the VMCS that L0 uses
to run its nested guest L2.

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
---
 arch/x86/kvm/vmx.c |  142 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

--- .before/arch/x86/kvm/vmx.c	2011-01-26 18:06:03.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c	2011-01-26 18:06:03.000000000 +0200
@@ -117,6 +117,7 @@ static int ple_window = KVM_VMX_DEFAULT_
 module_param(ple_window, int, S_IRUGO);
 
 #define NR_AUTOLOAD_MSRS 1
+#define NESTED_MAX_VMCS 256
 
 struct vmcs {
 	u32 revision_id;
@@ -159,6 +160,34 @@ struct __packed vmcs12 {
 #define VMCS12_REVISION 0x11e57ed0
 
 /*
+ * When we temporarily switch a vcpu's VMCS (e.g., stop using an L1's VMCS
+ * while we use L2's VMCS), and wish to save the previous VMCS, we must also
+ * remember on which CPU it was last loaded (vcpu->cpu), so when we return to
+ * using this VMCS we'll know if we're now running on a different CPU and need
+ * to clear the VMCS on the old CPU, and load it on the new one. Additionally,
+ * we need to remember whether this VMCS was launched (vmx->launched), so when
+ * we return to it we know if to VMLAUNCH or to VMRESUME it (we cannot deduce
+ * this from other state, because it's possible that this VMCS had once been
+ * launched, but has since been cleared after a CPU switch, and now
+ * vmx->launch is 0.
+ */
+struct saved_vmcs {
+	struct vmcs *vmcs;
+	int cpu;
+	int launched;
+};
+
+/*
+ * A cache keeping a VMCS (vmcs02) for each loaded vmcs12. In addition to the
+ * VMCS, we need information on its state - see struct saved_vmcs above.
+ */
+struct vmcs_list {
+	struct list_head list;
+	gpa_t vmcs12_addr;
+	struct saved_vmcs vmcs02;
+};
+
+/*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example,
  * the current VMCS set by L1, a list of the VMCSs used to run the active
@@ -173,6 +202,10 @@ struct nested_vmx {
 	/* The host-usable pointer to the above */
 	struct page *current_vmcs12_page;
 	struct vmcs12 *current_vmcs12;
+
+	/* list of real (hardware) VMCS, one for each L2 guest of L1 */
+	struct list_head vmcs02_list; /* a vmcs_list */
+	int vmcs02_num;
 };
 
 struct vcpu_vmx {
@@ -3964,6 +3997,110 @@ static int handle_invalid_op(struct kvm_
 	return 1;
 }
 
+/* Find a vmcs02 saved for the current L2's vmcs12 */
+static struct saved_vmcs *nested_get_current_vmcs(struct vcpu_vmx *vmx)
+{
+	struct vmcs_list *item;
+	list_for_each_entry(item, &vmx->nested.vmcs02_list, list)
+		if (item->vmcs12_addr == vmx->nested.current_vmptr)
+			return &item->vmcs02;
+	return NULL;
+}
+
+/*
+ * Allocate an L0 VMCS (vmcs02) for the current L1 VMCS (vmcs12), if one
+ * does not already exist. The allocation is done in L0 memory, so to avoid
+ * denial-of-service attack by guests, we limit the number of concurrently-
+ * allocated vmcss. A well-behaving L1 will VMCLEAR unused vmcs12s and not
+ * trigger this limit.
+ */
+static int nested_create_current_vmcs(struct kvm_vcpu *vcpu)
+{
+	struct vmcs_list *new_l2_guest;
+	struct vmcs *vmcs02;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (nested_get_current_vmcs(vmx))
+		return 0; /* nothing to do - we already have a VMCS */
+
+	if (vmx->nested.vmcs02_num >= NESTED_MAX_VMCS)
+		return -ENOMEM;
+
+	new_l2_guest = (struct vmcs_list *)
+		kmalloc(sizeof(struct vmcs_list), GFP_KERNEL);
+	if (!new_l2_guest)
+		return -ENOMEM;
+
+	vmcs02 = alloc_vmcs();
+	if (!vmcs02) {
+		kfree(new_l2_guest);
+		return -ENOMEM;
+	}
+
+	new_l2_guest->vmcs12_addr = vmx->nested.current_vmptr;
+	new_l2_guest->vmcs02.vmcs = vmcs02;
+	new_l2_guest->vmcs02.cpu = -1;
+	new_l2_guest->vmcs02.launched = 0;
+	list_add(&(new_l2_guest->list), &(vmx->nested.vmcs02_list));
+	vmx->nested.vmcs02_num++;
+	return 0;
+}
+
+static void __nested_free_saved_vmcs(void *arg)
+{
+	struct saved_vmcs *saved_vmcs = arg;
+	int cpu = raw_smp_processor_id();
+
+	if (saved_vmcs->cpu == cpu) /* TODO: how can this not be the case? */
+		vmcs_clear(saved_vmcs->vmcs);
+	if (per_cpu(current_vmcs, cpu) == saved_vmcs->vmcs)
+		per_cpu(current_vmcs, cpu) = NULL;
+}
+
+/*
+ * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
+ * (the necessary information is in the saved_vmcs structure).
+ * See also vcpu_clear() (with different parameters and side-effects)
+ */
+static void nested_free_saved_vmcs(struct vcpu_vmx *vmx,
+		struct saved_vmcs *saved_vmcs)
+{
+	if (saved_vmcs->cpu != -1)
+		smp_call_function_single(saved_vmcs->cpu,
+				__nested_free_saved_vmcs, saved_vmcs, 1);
+
+	free_vmcs(saved_vmcs->vmcs);
+}
+
+/*
+ * Free a vmcs12's associated vmcs02 (if there is one), and remove it from
+ * vmcs02_list.
+ */
+static void nested_free_vmcs(struct vcpu_vmx *vmx, gpa_t vmptr)
+{
+	struct vmcs_list *item;
+	list_for_each_entry(item, &vmx->nested.vmcs02_list, list)
+		if (item->vmcs12_addr == vmptr) {
+			nested_free_saved_vmcs(vmx, &item->vmcs02);
+			list_del(&item->list);
+			kfree(item);
+			vmx->nested.vmcs02_num--;
+			return;
+		}
+}
+
+/* Free all vmcs02 saved for this L1 vcpu */
+static void nested_free_all_vmcs(struct vcpu_vmx *vmx)
+{
+	struct vmcs_list *item, *n;
+	list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_list, list) {
+		nested_free_saved_vmcs(vmx, &item->vmcs02);
+		list_del(&item->list);
+		kfree(item);
+	}
+	vmx->nested.vmcs02_num = 0;
+}
+
 /*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
@@ -4000,6 +4137,9 @@ static int handle_vmon(struct kvm_vcpu *
 		return 1;
 	}
 
+	INIT_LIST_HEAD(&(vmx->nested.vmcs02_list));
+	vmx->nested.vmcs02_num = 0;
+
 	vmx->nested.vmxon = true;
 
 	skip_emulated_instruction(vcpu);
@@ -4050,6 +4190,8 @@ static void free_nested(struct vcpu_vmx 
 		nested_release_page(vmx->nested.current_vmcs12_page);
 		vmx->nested.current_vmptr = -1ull;
 	}
+
+	nested_free_all_vmcs(vmx);
 }
 
 /* Emulate the VMXOFF instruction */

  parent reply	other threads:[~2011-01-27  8:33 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-01-27  8:29 [PATCH 0/29] nVMX: Nested VMX, v8 Nadav Har'El
2011-01-27  8:30 ` [PATCH 01/29] nVMX: Add "nested" module option to vmx.c Nadav Har'El
2011-01-27  8:30 ` [PATCH 02/29] nVMX: Implement VMXON and VMXOFF Nadav Har'El
2011-01-27  8:31 ` [PATCH 03/29] nVMX: Allow setting the VMXE bit in CR4 Nadav Har'El
2011-01-27  8:31 ` [PATCH 04/29] nVMX: Introduce vmcs12: a VMCS structure for L1 Nadav Har'El
2011-01-27  8:32 ` [PATCH 05/29] nVMX: Implement reading and writing of VMX MSRs Nadav Har'El
2011-01-30  9:52   ` Avi Kivity
2011-01-31  8:57     ` Nadav Har'El
2011-01-31  9:01       ` Avi Kivity
2011-01-27  8:32 ` [PATCH 06/29] nVMX: Decoding memory operands of VMX instructions Nadav Har'El
2011-01-27  8:33 ` Nadav Har'El [this message]
2011-01-30 10:02   ` [PATCH 07/29] nVMX: Hold a vmcs02 for each vmcs12 Avi Kivity
2011-01-31  9:26     ` Nadav Har'El
2011-01-31  9:41       ` Avi Kivity
2011-02-03 12:57     ` Nadav Har'El
2011-02-06  9:16       ` Avi Kivity
2011-02-13 13:04         ` Nadav Har'El
2011-02-13 14:58           ` Avi Kivity
2011-02-13 20:07             ` Nadav Har'El
2011-01-27  8:33 ` [PATCH 08/29] nVMX: Fix local_vcpus_link handling Nadav Har'El
2011-01-30 10:08   ` Avi Kivity
2011-01-27  8:34 ` [PATCH 09/29] nVMX: Add VMCS fields to the vmcs12 Nadav Har'El
2011-01-30 10:10   ` Avi Kivity
2011-01-27  8:34 ` [PATCH 10/29] nVMX: Success/failure of VMX instructions Nadav Har'El
2011-01-27  8:35 ` [PATCH 11/29] nVMX: Implement VMCLEAR Nadav Har'El
2011-01-30 12:07   ` Avi Kivity
2011-01-27  8:35 ` [PATCH 12/29] nVMX: Implement VMPTRLD Nadav Har'El
2011-01-27  8:36 ` [PATCH 13/29] nVMX: Implement VMPTRST Nadav Har'El
2011-01-27  8:37 ` [PATCH 14/29] nVMX: Implement VMREAD and VMWRITE Nadav Har'El
2011-01-27  8:37 ` [PATCH 15/29] nVMX: Prepare vmcs02 from vmcs01 and vmcs12 Nadav Har'El
2011-01-27  8:38 ` [PATCH 16/29] nVMX: Move register-syncing to a function Nadav Har'El
2011-01-27  8:38 ` [PATCH 17/29] nVMX: Implement VMLAUNCH and VMRESUME Nadav Har'El
2011-01-27  8:39 ` [PATCH 18/29] nVMX: No need for handle_vmx_insn function any more Nadav Har'El
2011-01-27  8:39 ` [PATCH 19/29] nVMX: Exiting from L2 to L1 Nadav Har'El
2011-01-27  8:40 ` [PATCH 20/29] nVMX: Deciding if L0 or L1 should handle an L2 exit Nadav Har'El
2011-01-27  8:40 ` [PATCH 21/29] nVMX: Correct handling of interrupt injection Nadav Har'El
2011-01-27  8:41 ` [PATCH 22/29] nVMX: Correct handling of exception injection Nadav Har'El
2011-01-27  8:41 ` [PATCH 23/29] nVMX: Correct handling of idt vectoring info Nadav Har'El
2011-01-27  8:42 ` [PATCH 24/29] nVMX: Handling of CR0 and CR4 modifying instructions Nadav Har'El
2011-01-27  8:42 ` [PATCH 25/29] nVMX: Further fixes for lazy FPU loading Nadav Har'El
2011-01-27  8:43 ` [PATCH 26/29] nVMX: Additional TSC-offset handling Nadav Har'El
2011-01-27  8:43 ` [PATCH 27/29] nVMX: Add VMX to list of supported cpuid features Nadav Har'El
2011-01-27  8:44 ` [PATCH 28/29] nVMX: Miscellenous small corrections Nadav Har'El
2011-01-27  8:44 ` [PATCH 29/29] nVMX: Documentation Nadav Har'El
2011-01-28  8:41 ` [PATCH 0/29] nVMX: Nested VMX, v8 Juerg Haefliger
2011-01-28 17:16   ` Nadav Har'El
2011-01-31 10:07   ` Nadav Har'El

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201101270833.p0R8XQ4w002480@rice.haifa.ibm.com \
    --to=nyh@il.ibm.com \
    --cc=avi@redhat.com \
    --cc=gleb@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.