linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] KVM fixes for 2.6.20
@ 2007-01-23 14:30 Avi Kivity
  2007-01-23 14:32 ` [PATCH 1/5] KVM: SVM: Fix SVM idt confusion Avi Kivity
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Avi Kivity @ 2007-01-23 14:30 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, Andrew Morton, Ingo Molnar

The following series of patches fixes several kvm problems.  I believe 
they are important and well-tested enough to be included in 2.6.20, 
especially the host reboot fix on AMD machines.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/5] KVM: SVM: Fix SVM idt confusion
  2007-01-23 14:30 [PATCH 0/5] KVM fixes for 2.6.20 Avi Kivity
@ 2007-01-23 14:32 ` Avi Kivity
  2007-01-23 14:33 ` [PATCH 2/5] KVM: Emulate IA32_MISC_ENABLE msr Avi Kivity
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2007-01-23 14:32 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, akpm, mingo

From: Leonard Norrgard <leonard.norrgard@refactor.fi>

There's an obvious typo in svm_{get,set}_idt, causing it to access the ldt
instead.

Because these functions are only called for save/load on AMD, the bug does not
impact normal operation.  With the fix, save/load works as expected on AMD
hosts.

Signed-off-by: Uri Lublin <uril@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>

Index: linux-2.6/drivers/kvm/svm.c
===================================================================
--- linux-2.6.orig/drivers/kvm/svm.c
+++ linux-2.6/drivers/kvm/svm.c
@@ -680,14 +680,14 @@ static void svm_get_cs_db_l_bits(struct 
 
 static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	dt->limit = vcpu->svm->vmcb->save.ldtr.limit;
-	dt->base = vcpu->svm->vmcb->save.ldtr.base;
+	dt->limit = vcpu->svm->vmcb->save.idtr.limit;
+	dt->base = vcpu->svm->vmcb->save.idtr.base;
 }
 
 static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	vcpu->svm->vmcb->save.ldtr.limit = dt->limit;
-	vcpu->svm->vmcb->save.ldtr.base = dt->base ;
+	vcpu->svm->vmcb->save.idtr.limit = dt->limit;
+	vcpu->svm->vmcb->save.idtr.base = dt->base ;
 }
 
 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/5] KVM: Emulate IA32_MISC_ENABLE msr
  2007-01-23 14:30 [PATCH 0/5] KVM fixes for 2.6.20 Avi Kivity
  2007-01-23 14:32 ` [PATCH 1/5] KVM: SVM: Fix SVM idt confusion Avi Kivity
@ 2007-01-23 14:33 ` Avi Kivity
  2007-01-23 14:34 ` [PATCH 3/5] KVM: MMU: Perform access checks in walk_addr() Avi Kivity
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2007-01-23 14:33 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, akpm, mingo

This allows netbsd 3.1 i386 to get further along installing.

Signed-off-by: Avi Kivity <avi@qumranet.com>

Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -1226,6 +1226,9 @@ int kvm_get_msr_common(struct kvm_vcpu *
 	case MSR_IA32_APICBASE:
 		data = vcpu->apic_base;
 		break;
+	case MSR_IA32_MISC_ENABLE:
+		data = vcpu->ia32_misc_enable_msr;
+		break;
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
 		data = vcpu->shadow_efer;
@@ -1297,6 +1300,9 @@ int kvm_set_msr_common(struct kvm_vcpu *
 	case MSR_IA32_APICBASE:
 		vcpu->apic_base = data;
 		break;
+	case MSR_IA32_MISC_ENABLE:
+		vcpu->ia32_misc_enable_msr = data;
+		break;
 	default:
 		printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr);
 		return 1;
@@ -1600,6 +1606,10 @@ static u32 msrs_to_save[] = {
 
 static unsigned num_msrs_to_save;
 
+static u32 emulated_msrs[] = {
+	MSR_IA32_MISC_ENABLE,
+};
+
 static __init void kvm_init_msr_list(void)
 {
 	u32 dummy[2];
@@ -1925,7 +1935,7 @@ static long kvm_dev_ioctl(struct file *f
 		if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
 			goto out;
 		n = msr_list.nmsrs;
-		msr_list.nmsrs = num_msrs_to_save;
+		msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
 		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
 			goto out;
 		r = -E2BIG;
@@ -1935,6 +1945,11 @@ static long kvm_dev_ioctl(struct file *f
 		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
 				 num_msrs_to_save * sizeof(u32)))
 			goto out;
+		if (copy_to_user(user_msr_list->indices
+				 + num_msrs_to_save * sizeof(u32),
+				 &emulated_msrs,
+				 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
+			goto out;
 		r = 0;
 		break;
 	}
Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -242,6 +242,7 @@ struct kvm_vcpu {
 	u64 pdptrs[4]; /* pae */
 	u64 shadow_efer;
 	u64 apic_base;
+	u64 ia32_misc_enable_msr;
 	int nmsrs;
 	struct vmx_msr_entry *guest_msrs;
 	struct vmx_msr_entry *host_msrs;

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/5] KVM: MMU: Perform access checks in walk_addr()
  2007-01-23 14:30 [PATCH 0/5] KVM fixes for 2.6.20 Avi Kivity
  2007-01-23 14:32 ` [PATCH 1/5] KVM: SVM: Fix SVM idt confusion Avi Kivity
  2007-01-23 14:33 ` [PATCH 2/5] KVM: Emulate IA32_MISC_ENABLE msr Avi Kivity
@ 2007-01-23 14:34 ` Avi Kivity
  2007-01-23 14:35 ` [PATCH 4/5] KVM: MMU: Report nx faults to the guest Avi Kivity
  2007-01-23 14:36 ` [PATCH 5/5] KVM: SVM: Propagate cpu shutdown events to userspace Avi Kivity
  4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2007-01-23 14:34 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, akpm, mingo

Check pte permission bits in walk_addr(), instead of scattering the checks all
over the code.  This has the following benefits:

1. We no longer set the accessed bit for accessed which fail permission checks.
2. Setting the accessed bit is simplified.
3. Under some circumstances, we used to pretend a page fault was fixed when
   it would actually fail the access checks.  This caused an unnecessary
   vmexit.
4. The error code for guest page faults is now correct.

The fix helps netbsd further along booting, and allows kvm to pass the new mmu
testsuite.

Signed-off-by: Avi Kivity <avi@qumranet.com>

Index: linux-2.6/drivers/kvm/paging_tmpl.h
===================================================================
--- linux-2.6.orig/drivers/kvm/paging_tmpl.h
+++ linux-2.6/drivers/kvm/paging_tmpl.h
@@ -63,13 +63,15 @@ struct guest_walker {
 	pt_element_t *ptep;
 	pt_element_t inherited_ar;
 	gfn_t gfn;
+	u32 error_code;
 };
 
 /*
  * Fetch a guest pte for a guest virtual address
  */
-static void FNAME(walk_addr)(struct guest_walker *walker,
-			     struct kvm_vcpu *vcpu, gva_t addr)
+static int FNAME(walk_addr)(struct guest_walker *walker,
+			    struct kvm_vcpu *vcpu, gva_t addr,
+			    int write_fault, int user_fault)
 {
 	hpa_t hpa;
 	struct kvm_memory_slot *slot;
@@ -86,7 +88,7 @@ static void FNAME(walk_addr)(struct gues
 		walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
 		root = *walker->ptep;
 		if (!(root & PT_PRESENT_MASK))
-			return;
+			goto not_present;
 		--walker->level;
 	}
 #endif
@@ -111,11 +113,18 @@ static void FNAME(walk_addr)(struct gues
 		ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
 		       ((unsigned long)ptep & PAGE_MASK));
 
-		if (is_present_pte(*ptep) && !(*ptep &  PT_ACCESSED_MASK))
-			*ptep |= PT_ACCESSED_MASK;
-
 		if (!is_present_pte(*ptep))
-			break;
+			goto not_present;
+
+		if (write_fault && !is_writeble_pte(*ptep))
+			if (user_fault || is_write_protection(vcpu))
+				goto access_error;
+
+		if (user_fault && !(*ptep & PT_USER_MASK))
+			goto access_error;
+
+		if (!(*ptep & PT_ACCESSED_MASK))
+			*ptep |= PT_ACCESSED_MASK; 	/* avoid rmw */
 
 		if (walker->level == PT_PAGE_TABLE_LEVEL) {
 			walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
@@ -146,6 +155,21 @@ static void FNAME(walk_addr)(struct gues
 	}
 	walker->ptep = ptep;
 	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
+	return 1;
+
+not_present:
+	walker->error_code = 0;
+	goto err;
+
+access_error:
+	walker->error_code = PFERR_PRESENT_MASK;
+
+err:
+	if (write_fault)
+		walker->error_code |= PFERR_WRITE_MASK;
+	if (user_fault)
+		walker->error_code |= PFERR_USER_MASK;
+	return 0;
 }
 
 static void FNAME(release_walker)(struct guest_walker *walker)
@@ -347,7 +371,6 @@ static int FNAME(page_fault)(struct kvm_
 			       u32 error_code)
 {
 	int write_fault = error_code & PFERR_WRITE_MASK;
-	int pte_present = error_code & PFERR_PRESENT_MASK;
 	int user_fault = error_code & PFERR_USER_MASK;
 	struct guest_walker walker;
 	u64 *shadow_pte;
@@ -365,19 +388,19 @@ static int FNAME(page_fault)(struct kvm_
 	/*
 	 * Look up the shadow pte for the faulting address.
 	 */
-	FNAME(walk_addr)(&walker, vcpu, addr);
-	shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
+	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault);
 
 	/*
 	 * The page is not mapped by the guest.  Let the guest handle it.
 	 */
-	if (!shadow_pte) {
-		pgprintk("%s: not mapped\n", __FUNCTION__);
-		inject_page_fault(vcpu, addr, error_code);
+	if (!r) {
+		pgprintk("%s: guest page fault\n", __FUNCTION__);
+		inject_page_fault(vcpu, addr, walker.error_code);
 		FNAME(release_walker)(&walker);
 		return 0;
 	}
 
+	shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
 	pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
 		 shadow_pte, *shadow_pte);
 
@@ -399,22 +422,7 @@ static int FNAME(page_fault)(struct kvm_
 	 * mmio: emulate if accessible, otherwise its a guest fault.
 	 */
 	if (is_io_pte(*shadow_pte)) {
-		if (may_access(*shadow_pte, write_fault, user_fault))
-			return 1;
-		pgprintk("%s: io work, no access\n", __FUNCTION__);
-		inject_page_fault(vcpu, addr,
-				  error_code | PFERR_PRESENT_MASK);
-		kvm_mmu_audit(vcpu, "post page fault (io)");
-		return 0;
-	}
-
-	/*
-	 * pte not present, guest page fault.
-	 */
-	if (pte_present && !fixed && !write_pt) {
-		inject_page_fault(vcpu, addr, error_code);
-		kvm_mmu_audit(vcpu, "post page fault (guest)");
-		return 0;
+		return 1;
 	}
 
 	++kvm_stat.pf_fixed;
@@ -429,7 +437,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kv
 	pt_element_t guest_pte;
 	gpa_t gpa;
 
-	FNAME(walk_addr)(&walker, vcpu, vaddr);
+	FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0);
 	guest_pte = *walker.ptep;
 	FNAME(release_walker)(&walker);
 
Index: linux-2.6/drivers/kvm/mmu.c
===================================================================
--- linux-2.6.orig/drivers/kvm/mmu.c
+++ linux-2.6/drivers/kvm/mmu.c
@@ -992,16 +992,6 @@ static inline int fix_read_pf(u64 *shado
 	return 0;
 }
 
-static int may_access(u64 pte, int write, int user)
-{
-
-	if (user && !(pte & PT_USER_MASK))
-		return 0;
-	if (write && !(pte & PT_WRITABLE_MASK))
-		return 0;
-	return 1;
-}
-
 static void paging_free(struct kvm_vcpu *vcpu)
 {
 	nonpaging_free(vcpu);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/5] KVM: MMU: Report nx faults to the guest
  2007-01-23 14:30 [PATCH 0/5] KVM fixes for 2.6.20 Avi Kivity
                   ` (2 preceding siblings ...)
  2007-01-23 14:34 ` [PATCH 3/5] KVM: MMU: Perform access checks in walk_addr() Avi Kivity
@ 2007-01-23 14:35 ` Avi Kivity
  2007-01-23 14:36 ` [PATCH 5/5] KVM: SVM: Propagate cpu shutdown events to userspace Avi Kivity
  4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2007-01-23 14:35 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, akpm, mingo

Rith the recent guest page fault change, we perform access checks on our own
instead of relying on the cpu.  This means we have to perform the nx checks as
well.

Software like the google toolbar on windows appears to rely on this somehow.

Signed-off-by: Avi Kivity <avi@qumranet.com>

Index: linux-2.6/drivers/kvm/mmu.c
===================================================================
--- linux-2.6.orig/drivers/kvm/mmu.c
+++ linux-2.6/drivers/kvm/mmu.c
@@ -143,6 +143,7 @@ static int dbg = 1;
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_FETCH_MASK (1U << 4)
 
 #define PT64_ROOT_LEVEL 4
 #define PT32_ROOT_LEVEL 2
@@ -168,6 +169,11 @@ static int is_cpuid_PSE36(void)
 	return 1;
 }
 
+static int is_nx(struct kvm_vcpu *vcpu)
+{
+	return vcpu->shadow_efer & EFER_NX;
+}
+
 static int is_present_pte(unsigned long pte)
 {
 	return pte & PT_PRESENT_MASK;
Index: linux-2.6/drivers/kvm/paging_tmpl.h
===================================================================
--- linux-2.6.orig/drivers/kvm/paging_tmpl.h
+++ linux-2.6/drivers/kvm/paging_tmpl.h
@@ -71,7 +71,7 @@ struct guest_walker {
  */
 static int FNAME(walk_addr)(struct guest_walker *walker,
 			    struct kvm_vcpu *vcpu, gva_t addr,
-			    int write_fault, int user_fault)
+			    int write_fault, int user_fault, int fetch_fault)
 {
 	hpa_t hpa;
 	struct kvm_memory_slot *slot;
@@ -123,6 +123,11 @@ static int FNAME(walk_addr)(struct guest
 		if (user_fault && !(*ptep & PT_USER_MASK))
 			goto access_error;
 
+#if PTTYPE == 64
+		if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
+			goto access_error;
+#endif
+
 		if (!(*ptep & PT_ACCESSED_MASK))
 			*ptep |= PT_ACCESSED_MASK; 	/* avoid rmw */
 
@@ -169,6 +174,8 @@ err:
 		walker->error_code |= PFERR_WRITE_MASK;
 	if (user_fault)
 		walker->error_code |= PFERR_USER_MASK;
+	if (fetch_fault)
+		walker->error_code |= PFERR_FETCH_MASK;
 	return 0;
 }
 
@@ -372,6 +379,7 @@ static int FNAME(page_fault)(struct kvm_
 {
 	int write_fault = error_code & PFERR_WRITE_MASK;
 	int user_fault = error_code & PFERR_USER_MASK;
+	int fetch_fault = error_code & PFERR_FETCH_MASK;
 	struct guest_walker walker;
 	u64 *shadow_pte;
 	int fixed;
@@ -388,7 +396,8 @@ static int FNAME(page_fault)(struct kvm_
 	/*
 	 * Look up the shadow pte for the faulting address.
 	 */
-	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault);
+	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
+			     fetch_fault);
 
 	/*
 	 * The page is not mapped by the guest.  Let the guest handle it.
@@ -437,7 +446,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kv
 	pt_element_t guest_pte;
 	gpa_t gpa;
 
-	FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0);
+	FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
 	guest_pte = *walker.ptep;
 	FNAME(release_walker)(&walker);
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 5/5] KVM: SVM: Propagate cpu shutdown events to userspace
  2007-01-23 14:30 [PATCH 0/5] KVM fixes for 2.6.20 Avi Kivity
                   ` (3 preceding siblings ...)
  2007-01-23 14:35 ` [PATCH 4/5] KVM: MMU: Report nx faults to the guest Avi Kivity
@ 2007-01-23 14:36 ` Avi Kivity
  4 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2007-01-23 14:36 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, akpm, mingo

From: Joerg Roedel <joerg.roedel@amd.com>

This patch implements forwarding of SHUTDOWN intercepts from the guest
on to userspace on AMD SVM. A SHUTDOWN event occurs when the guest produces
a triple fault (e.g. on reboot). This also fixes the bug that a guest
reboot actually causes a host reboot under some circumstances.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>

Index: linux-2.6/include/linux/kvm.h
===================================================================
--- linux-2.6.orig/include/linux/kvm.h
+++ linux-2.6/include/linux/kvm.h
@@ -46,6 +46,7 @@ enum kvm_exit_reason {
 	KVM_EXIT_HLT              = 5,
 	KVM_EXIT_MMIO             = 6,
 	KVM_EXIT_IRQ_WINDOW_OPEN  = 7,
+	KVM_EXIT_SHUTDOWN         = 8,
 };
 
 /* for KVM_RUN */
Index: linux-2.6/drivers/kvm/svm.c
===================================================================
--- linux-2.6.orig/drivers/kvm/svm.c
+++ linux-2.6/drivers/kvm/svm.c
@@ -502,6 +502,7 @@ static void init_vmcb(struct vmcb *vmcb)
 				(1ULL << INTERCEPT_IOIO_PROT) |
 				(1ULL << INTERCEPT_MSR_PROT) |
 				(1ULL << INTERCEPT_TASK_SWITCH) |
+				(1ULL << INTERCEPT_SHUTDOWN) |
 				(1ULL << INTERCEPT_VMRUN) |
 				(1ULL << INTERCEPT_VMMCALL) |
 				(1ULL << INTERCEPT_VMLOAD) |
@@ -892,6 +893,19 @@ static int pf_interception(struct kvm_vc
 	return 0;
 }
 
+static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	/*
+	 * VMCB is undefined after a SHUTDOWN intercept
+	 * so reinitialize it.
+	 */
+	memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
+	init_vmcb(vcpu->svm->vmcb);
+
+	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+	return 0;
+}
+
 static int io_get_override(struct kvm_vcpu *vcpu,
 			  struct vmcb_seg **seg,
 			  int *addr_override)
@@ -1249,6 +1263,7 @@ static int (*svm_exit_handlers[])(struct
 	[SVM_EXIT_IOIO] 		  	= io_interception,
 	[SVM_EXIT_MSR]				= msr_interception,
 	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
+	[SVM_EXIT_SHUTDOWN]			= shutdown_interception,
 	[SVM_EXIT_VMRUN]			= invalid_op_interception,
 	[SVM_EXIT_VMMCALL]			= invalid_op_interception,
 	[SVM_EXIT_VMLOAD]			= invalid_op_interception,

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2007-01-23 14:36 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-23 14:30 [PATCH 0/5] KVM fixes for 2.6.20 Avi Kivity
2007-01-23 14:32 ` [PATCH 1/5] KVM: SVM: Fix SVM idt confusion Avi Kivity
2007-01-23 14:33 ` [PATCH 2/5] KVM: Emulate IA32_MISC_ENABLE msr Avi Kivity
2007-01-23 14:34 ` [PATCH 3/5] KVM: MMU: Perform access checks in walk_addr() Avi Kivity
2007-01-23 14:35 ` [PATCH 4/5] KVM: MMU: Report nx faults to the guest Avi Kivity
2007-01-23 14:36 ` [PATCH 5/5] KVM: SVM: Propagate cpu shutdown events to userspace Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).