All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Activate Virtualization On Demand
@ 2009-09-09 14:18 Alexander Graf
  2009-09-14  5:05 ` Avi Kivity
  2009-09-14 13:23 ` Marcelo Tosatti
  0 siblings, 2 replies; 13+ messages in thread
From: Alexander Graf @ 2009-09-09 14:18 UTC (permalink / raw)
  To: kvm

X86 CPUs need to have some magic happening to enable the virtualization
extensions on them. This magic can result in unpleasant results for
users, like blocking other VMMs from working (vmx) or using invalid TLB
entries (svm).

Currently KVM activates virtualization when the respective kernel module
is loaded. This blocks us from autoloading KVM modules without breaking
other VMMs.

To circumvent this problem at least a bit, this patch introduces on
demand activation of virtualization. This means, that instead
virtualization is enabled on creation of the first virtual machine
and disabled on destruction of the last one.

So using this, KVM can be easily autoloaded, while keeping other
hypervisors usable.

Signed-off-by: Alexander Graf <agraf@suse.de>

--

I've tested the following:

  - shutdown
  - suspend / resume to RAM
  - running VirtualBox while kvm module is loaded
---
 arch/ia64/kvm/kvm-ia64.c        |    8 ++-
 arch/powerpc/kvm/powerpc.c      |    3 +-
 arch/s390/kvm/kvm-s390.c        |    3 +-
 arch/x86/include/asm/kvm_host.h |    2 +-
 arch/x86/kvm/svm.c              |   13 ++++--
 arch/x86/kvm/vmx.c              |    7 +++-
 arch/x86/kvm/x86.c              |    4 +-
 include/linux/kvm_host.h        |    2 +-
 virt/kvm/kvm_main.c             |   82 +++++++++++++++++++++++++++++++++------
 9 files changed, 98 insertions(+), 26 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index f6471c8..5fdeec5 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	long  status;
 	long  tmp_base;
@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
 	local_irq_restore(saved_psr);
 	if (slot < 0)
-		return;
+		return -EINVAL;
 
 	spin_lock(&vp_lock);
 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
 	if (status != 0) {
 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return ;
+		return -EINVAL;
 	}
 
 	if (!kvm_vsa_base) {
@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
 	}
 	spin_unlock(&vp_lock);
 	ia64_ptr_entry(0x3, slot);
+
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 95af622..5902bbc 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 00e2ce8..5445058 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 static unsigned long long *facilities;
 
 /* Section: not file related */
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	/* every s390 is virtualization enabled ;-) */
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6046e6f..b17886f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -462,7 +462,7 @@ struct descriptor_table {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	void (*hardware_enable)(void *dummy);      /* __init */
+	int (*hardware_enable)(void *dummy);
 	void (*hardware_disable)(void *dummy);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a5f90c7..2f3a388 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
 	cpu_svm_disable();
 }
 
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
 {
 
 	struct svm_cpu_data *svm_data;
@@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage)
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
+	rdmsrl(MSR_EFER, efer);
+	if (efer & EFER_SVME)
+		return -EBUSY;
+
 	if (!has_svm()) {
 		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
-		return;
+		return -EINVAL;
 	}
 	svm_data = per_cpu(svm_data, me);
 
 	if (!svm_data) {
 		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
 		       me);
-		return;
+		return -EINVAL;
 	}
 
 	svm_data->asid_generation = 1;
@@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage)
 	gdt = (struct desc_struct *)gdt_descr.base;
 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
-	rdmsrl(MSR_EFER, efer);
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
 
 	wrmsrl(MSR_VM_HSAVE_PA,
 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+
+	return 0;
 }
 
 static void svm_cpu_uninit(int cpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d3213ac..c20a902 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void)
 	/* locked but not enabled */
 }
 
-static void hardware_enable(void *garbage)
+static int hardware_enable(void *garbage)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 	u64 old;
 
+	if (read_cr4() & X86_CR4_VMXE)
+		return -EBUSY;
+
 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
 	if ((old & (FEATURE_CONTROL_LOCKED |
@@ -1158,6 +1161,8 @@ static void hardware_enable(void *garbage)
 	asm volatile (ASM_VMX_VMXON_RAX
 		      : : "a"(&phys_addr), "m"(phys_addr)
 		      : "memory", "cc");
+
+	return 0;
 }
 
 static void vmclear_local_vcpus(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 891234b..ec16169 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4703,9 +4703,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
-	kvm_x86_ops->hardware_enable(garbage);
+	return kvm_x86_ops->hardware_enable(garbage);
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3e57be4..0bf9ee9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -346,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
-void kvm_arch_hardware_enable(void *garbage);
+int kvm_arch_hardware_enable(void *garbage);
 void kvm_arch_hardware_disable(void *garbage);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6ce5ef3..39f0f5e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
+static int kvm_usage_count = 0;
+static atomic_t hardware_enable_failed;
 
 struct kmem_cache *kvm_vcpu_cache;
 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
+static int hardware_enable_all(void);
+static void hardware_disable_all(void);
 
 static bool kvm_rebooting;
 
@@ -326,6 +330,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 
 static struct kvm *kvm_create_vm(void)
 {
+	int r = 0;
 	struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct page *page;
@@ -333,6 +338,11 @@ static struct kvm *kvm_create_vm(void)
 
 	if (IS_ERR(kvm))
 		goto out;
+
+	r = hardware_enable_all();
+	if (r)
+		goto out_err;
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
@@ -341,8 +351,8 @@ static struct kvm *kvm_create_vm(void)
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
-		kfree(kvm);
-		return ERR_PTR(-ENOMEM);
+		r = -ENOMEM;
+		goto out_err;
 	}
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -350,15 +360,13 @@ static struct kvm *kvm_create_vm(void)
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	{
-		int err;
 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
-		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
-		if (err) {
+		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+		if (r) {
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 			put_page(page);
 #endif
-			kfree(kvm);
-			return ERR_PTR(err);
+			goto out_err;
 		}
 	}
 #endif
@@ -382,6 +390,11 @@ static struct kvm *kvm_create_vm(void)
 #endif
 out:
 	return kvm;
+
+out_err:
+	hardware_disable_all();
+	kfree(kvm);
+	return ERR_PTR(r);
 }
 
 /*
@@ -440,6 +453,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_flush_shadow(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
+	hardware_disable_all();
 	mmdrop(mm);
 }
 
@@ -1631,11 +1645,41 @@ static struct miscdevice kvm_dev = {
 static void hardware_enable(void *junk)
 {
 	int cpu = raw_smp_processor_id();
+	int r;
 
 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
+
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_enable(NULL);
+
+	r = kvm_arch_hardware_enable(NULL);
+
+	if (r) {
+		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
+		atomic_inc(&hardware_enable_failed);
+		printk(KERN_INFO "kvm: enabling virtualization on "
+				 "CPU%d failed\n", cpu);
+	}
+}
+
+static int hardware_enable_all(void)
+{
+	int r = 0;
+
+	spin_lock(&kvm_lock);
+
+	kvm_usage_count++;
+	if (kvm_usage_count == 1) {
+		atomic_set(&hardware_enable_failed, 0);
+		on_each_cpu(hardware_enable, NULL, 1);
+
+		if (atomic_read(&hardware_enable_failed))
+			r = -EBUSY;
+	}
+
+	spin_unlock(&kvm_lock);
+
+	return r;
 }
 
 static void hardware_disable(void *junk)
@@ -1648,11 +1692,25 @@ static void hardware_disable(void *junk)
 	kvm_arch_hardware_disable(NULL);
 }
 
+static void hardware_disable_all(void)
+{
+	BUG_ON(!kvm_usage_count);
+
+	spin_lock(&kvm_lock);
+	kvm_usage_count--;
+	if (!kvm_usage_count)
+		on_each_cpu(hardware_disable, NULL, 1);
+	spin_unlock(&kvm_lock);
+}
+
 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 			   void *v)
 {
 	int cpu = (long)v;
 
+	if (!kvm_usage_count)
+		return NOTIFY_OK;
+
 	val &= ~CPU_TASKS_FROZEN;
 	switch (val) {
 	case CPU_DYING:
@@ -1855,13 +1913,15 @@ static void kvm_exit_debug(void)
 
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 {
-	hardware_disable(NULL);
+	if (kvm_usage_count)
+		hardware_disable(NULL);
 	return 0;
 }
 
 static int kvm_resume(struct sys_device *dev)
 {
-	hardware_enable(NULL);
+	if (kvm_usage_count)
+		hardware_enable(NULL);
 	return 0;
 }
 
@@ -1938,7 +1998,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 			goto out_free_1;
 	}
 
-	on_each_cpu(hardware_enable, NULL, 1);
 	r = register_cpu_notifier(&kvm_cpu_notifier);
 	if (r)
 		goto out_free_2;
@@ -1986,7 +2045,6 @@ out_free_3:
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_2:
-	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
 out_free_0a:
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-09-09 14:18 [PATCH] Activate Virtualization On Demand Alexander Graf
@ 2009-09-14  5:05 ` Avi Kivity
  2009-09-14 13:23 ` Marcelo Tosatti
  1 sibling, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2009-09-14  5:05 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm, Marcelo Tosatti

On 09/09/2009 05:18 PM, Alexander Graf wrote:
> X86 CPUs need to have some magic happening to enable the virtualization
> extensions on them. This magic can result in unpleasant results for
> users, like blocking other VMMs from working (vmx) or using invalid TLB
> entries (svm).
>
> Currently KVM activates virtualization when the respective kernel module
> is loaded. This blocks us from autoloading KVM modules without breaking
> other VMMs.
>
> To circumvent this problem at least a bit, this patch introduces on
> demand activation of virtualization. This means, that instead
> virtualization is enabled on creation of the first virtual machine
> and disabled on destruction of the last one.
>
> So using this, KVM can be easily autoloaded, while keeping other
> hypervisors usable.
>    

Looks good.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-09-09 14:18 [PATCH] Activate Virtualization On Demand Alexander Graf
  2009-09-14  5:05 ` Avi Kivity
@ 2009-09-14 13:23 ` Marcelo Tosatti
  2009-09-14 15:52   ` Alexander Graf
  1 sibling, 1 reply; 13+ messages in thread
From: Marcelo Tosatti @ 2009-09-14 13:23 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm

On Wed, Sep 09, 2009 at 04:18:58PM +0200, Alexander Graf wrote:
> X86 CPUs need to have some magic happening to enable the virtualization
> extensions on them. This magic can result in unpleasant results for
> users, like blocking other VMMs from working (vmx) or using invalid TLB
> entries (svm).
> 
> Currently KVM activates virtualization when the respective kernel module
> is loaded. This blocks us from autoloading KVM modules without breaking
> other VMMs.
> 
> To circumvent this problem at least a bit, this patch introduces on
> demand activation of virtualization. This means, that instead
> virtualization is enabled on creation of the first virtual machine
> and disabled on destruction of the last one.
> 
> So using this, KVM can be easily autoloaded, while keeping other
> hypervisors usable.
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> 
> --
> 
> I've tested the following:
> 
>   - shutdown
>   - suspend / resume to RAM
>   - running VirtualBox while kvm module is loaded
> ---
>  arch/ia64/kvm/kvm-ia64.c        |    8 ++-
>  arch/powerpc/kvm/powerpc.c      |    3 +-
>  arch/s390/kvm/kvm-s390.c        |    3 +-
>  arch/x86/include/asm/kvm_host.h |    2 +-
>  arch/x86/kvm/svm.c              |   13 ++++--
>  arch/x86/kvm/vmx.c              |    7 +++-
>  arch/x86/kvm/x86.c              |    4 +-
>  include/linux/kvm_host.h        |    2 +-
>  virt/kvm/kvm_main.c             |   82 +++++++++++++++++++++++++++++++++------
>  9 files changed, 98 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index f6471c8..5fdeec5 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
>  
>  static  DEFINE_SPINLOCK(vp_lock);
>  
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
>  	long  status;
>  	long  tmp_base;
> @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
>  	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
>  	local_irq_restore(saved_psr);
>  	if (slot < 0)
> -		return;
> +		return -EINVAL;
>  
>  	spin_lock(&vp_lock);
>  	status = ia64_pal_vp_init_env(kvm_vsa_base ?
> @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
>  			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
>  	if (status != 0) {
>  		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
> -		return ;
> +		return -EINVAL;
>  	}
>  
>  	if (!kvm_vsa_base) {
> @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
>  	}
>  	spin_unlock(&vp_lock);
>  	ia64_ptr_entry(0x3, slot);
> +
> +	return 0;
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 95af622..5902bbc 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
>  	return r;
>  }
>  
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
> +	return 0;
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 00e2ce8..5445058 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>  static unsigned long long *facilities;
>  
>  /* Section: not file related */
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
>  	/* every s390 is virtualization enabled ;-) */
> +	return 0;
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6046e6f..b17886f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -462,7 +462,7 @@ struct descriptor_table {
>  struct kvm_x86_ops {
>  	int (*cpu_has_kvm_support)(void);          /* __init */
>  	int (*disabled_by_bios)(void);             /* __init */
> -	void (*hardware_enable)(void *dummy);      /* __init */
> +	int (*hardware_enable)(void *dummy);
>  	void (*hardware_disable)(void *dummy);
>  	void (*check_processor_compatibility)(void *rtn);
>  	int (*hardware_setup)(void);               /* __init */
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a5f90c7..2f3a388 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
>  	cpu_svm_disable();
>  }
>  
> -static void svm_hardware_enable(void *garbage)
> +static int svm_hardware_enable(void *garbage)
>  {
>  
>  	struct svm_cpu_data *svm_data;
> @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage)
>  	struct desc_struct *gdt;
>  	int me = raw_smp_processor_id();
>  
> +	rdmsrl(MSR_EFER, efer);
> +	if (efer & EFER_SVME)
> +		return -EBUSY;
> +
>  	if (!has_svm()) {
>  		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
> -		return;
> +		return -EINVAL;
>  	}
>  	svm_data = per_cpu(svm_data, me);
>  
>  	if (!svm_data) {
>  		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
>  		       me);
> -		return;
> +		return -EINVAL;
>  	}
>  
>  	svm_data->asid_generation = 1;
> @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage)
>  	gdt = (struct desc_struct *)gdt_descr.base;
>  	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
>  
> -	rdmsrl(MSR_EFER, efer);
>  	wrmsrl(MSR_EFER, efer | EFER_SVME);
>  
>  	wrmsrl(MSR_VM_HSAVE_PA,
>  	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
> +
> +	return 0;
>  }
>  
>  static void svm_cpu_uninit(int cpu)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index d3213ac..c20a902 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void)
>  	/* locked but not enabled */
>  }
>  
> -static void hardware_enable(void *garbage)
> +static int hardware_enable(void *garbage)
>  {
>  	int cpu = raw_smp_processor_id();
>  	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
>  	u64 old;
>  
> +	if (read_cr4() & X86_CR4_VMXE)
> +		return -EBUSY;
> +
>  	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
>  	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
>  	if ((old & (FEATURE_CONTROL_LOCKED |
> @@ -1158,6 +1161,8 @@ static void hardware_enable(void *garbage)
>  	asm volatile (ASM_VMX_VMXON_RAX
>  		      : : "a"(&phys_addr), "m"(phys_addr)
>  		      : "memory", "cc");
> +
> +	return 0;
>  }
>  
>  static void vmclear_local_vcpus(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 891234b..ec16169 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4703,9 +4703,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
>  	return kvm_x86_ops->vcpu_reset(vcpu);
>  }
>  
> -void kvm_arch_hardware_enable(void *garbage)
> +int kvm_arch_hardware_enable(void *garbage)
>  {
> -	kvm_x86_ops->hardware_enable(garbage);
> +	return kvm_x86_ops->hardware_enable(garbage);
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 3e57be4..0bf9ee9 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -346,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
>  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
>  
>  int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
> -void kvm_arch_hardware_enable(void *garbage);
> +int kvm_arch_hardware_enable(void *garbage);
>  void kvm_arch_hardware_disable(void *garbage);
>  int kvm_arch_hardware_setup(void);
>  void kvm_arch_hardware_unsetup(void);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 6ce5ef3..39f0f5e 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
>  LIST_HEAD(vm_list);
>  
>  static cpumask_var_t cpus_hardware_enabled;
> +static int kvm_usage_count = 0;
> +static atomic_t hardware_enable_failed;
>  
>  struct kmem_cache *kvm_vcpu_cache;
>  EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
> @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
>  
>  static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
>  			   unsigned long arg);
> +static int hardware_enable_all(void);
> +static void hardware_disable_all(void);
>  
>  static bool kvm_rebooting;
>  
> @@ -326,6 +330,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
>  
>  static struct kvm *kvm_create_vm(void)
>  {
> +	int r = 0;
>  	struct kvm *kvm = kvm_arch_create_vm();
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  	struct page *page;
> @@ -333,6 +338,11 @@ static struct kvm *kvm_create_vm(void)
>  
>  	if (IS_ERR(kvm))
>  		goto out;
> +
> +	r = hardware_enable_all();
> +	if (r)
> +		goto out_err;
> +
>  #ifdef CONFIG_HAVE_KVM_IRQCHIP
>  	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>  	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
> @@ -341,8 +351,8 @@ static struct kvm *kvm_create_vm(void)
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>  	if (!page) {
> -		kfree(kvm);
> -		return ERR_PTR(-ENOMEM);
> +		r = -ENOMEM;
> +		goto out_err;
>  	}
>  	kvm->coalesced_mmio_ring =
>  			(struct kvm_coalesced_mmio_ring *)page_address(page);
> @@ -350,15 +360,13 @@ static struct kvm *kvm_create_vm(void)
>  
>  #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>  	{
> -		int err;
>  		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
> -		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
> -		if (err) {
> +		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
> +		if (r) {
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  			put_page(page);
>  #endif
> -			kfree(kvm);
> -			return ERR_PTR(err);
> +			goto out_err;
>  		}
>  	}
>  #endif
> @@ -382,6 +390,11 @@ static struct kvm *kvm_create_vm(void)
>  #endif
>  out:
>  	return kvm;
> +
> +out_err:
> +	hardware_disable_all();
> +	kfree(kvm);
> +	return ERR_PTR(r);
>  }
>  
>  /*
> @@ -440,6 +453,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>  	kvm_arch_flush_shadow(kvm);
>  #endif
>  	kvm_arch_destroy_vm(kvm);
> +	hardware_disable_all();
>  	mmdrop(mm);
>  }
>  
> @@ -1631,11 +1645,41 @@ static struct miscdevice kvm_dev = {
>  static void hardware_enable(void *junk)
>  {
>  	int cpu = raw_smp_processor_id();
> +	int r;
>  
>  	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
>  		return;
> +
>  	cpumask_set_cpu(cpu, cpus_hardware_enabled);
> -	kvm_arch_hardware_enable(NULL);
> +
> +	r = kvm_arch_hardware_enable(NULL);
> +
> +	if (r) {
> +		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
> +		atomic_inc(&hardware_enable_failed);
> +		printk(KERN_INFO "kvm: enabling virtualization on "
> +				 "CPU%d failed\n", cpu);
> +	}
> +}
> +
> +static int hardware_enable_all(void)
> +{
> +	int r = 0;
> +
> +	spin_lock(&kvm_lock);
> +
> +	kvm_usage_count++;
> +	if (kvm_usage_count == 1) {
> +		atomic_set(&hardware_enable_failed, 0);
> +		on_each_cpu(hardware_enable, NULL, 1);
> +
> +		if (atomic_read(&hardware_enable_failed))
> +			r = -EBUSY;
> +	}
> +
> +	spin_unlock(&kvm_lock);
> +
> +	return r;
>  }

I think the kvm_usage_count > 1 path should also test for
hardware_enable_failed (you assume that if kvm_usage_count > 1 
then hardware enablement has succeeded, which is not always true).

Also, better move vmx.c's ept_sync_global from vmx_init to 
hardware_enable.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-09-14 13:23 ` Marcelo Tosatti
@ 2009-09-14 15:52   ` Alexander Graf
  2009-09-14 16:14     ` Marcelo Tosatti
  0 siblings, 1 reply; 13+ messages in thread
From: Alexander Graf @ 2009-09-14 15:52 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm


On 14.09.2009, at 15:23, Marcelo Tosatti wrote:

> On Wed, Sep 09, 2009 at 04:18:58PM +0200, Alexander Graf wrote:
>> X86 CPUs need to have some magic happening to enable the  
>> virtualization
>> extensions on them. This magic can result in unpleasant results for
>> users, like blocking other VMMs from working (vmx) or using invalid  
>> TLB
>> entries (svm).
>>
>> Currently KVM activates virtualization when the respective kernel  
>> module
>> is loaded. This blocks us from autoloading KVM modules without  
>> breaking
>> other VMMs.
>>
>> To circumvent this problem at least a bit, this patch introduces on
>> demand activation of virtualization. This means, that instead
>> virtualization is enabled on creation of the first virtual machine
>> and disabled on destruction of the last one.
>>
>> So using this, KVM can be easily autoloaded, while keeping other
>> hypervisors usable.
>>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>
>> --
>>
>> I've tested the following:
>>
>>  - shutdown
>>  - suspend / resume to RAM
>>  - running VirtualBox while kvm module is loaded
>> ---
>> arch/ia64/kvm/kvm-ia64.c        |    8 ++-
>> arch/powerpc/kvm/powerpc.c      |    3 +-
>> arch/s390/kvm/kvm-s390.c        |    3 +-
>> arch/x86/include/asm/kvm_host.h |    2 +-
>> arch/x86/kvm/svm.c              |   13 ++++--
>> arch/x86/kvm/vmx.c              |    7 +++-
>> arch/x86/kvm/x86.c              |    4 +-
>> include/linux/kvm_host.h        |    2 +-
>> virt/kvm/kvm_main.c             |   82 +++++++++++++++++++++++++++++ 
>> ++++------
>> 9 files changed, 98 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
>> index f6471c8..5fdeec5 100644
>> --- a/arch/ia64/kvm/kvm-ia64.c
>> +++ b/arch/ia64/kvm/kvm-ia64.c
>> @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64  
>> *host_iva, u64 *opt_handler)
>>
>> static  DEFINE_SPINLOCK(vp_lock);
>>
>> -void kvm_arch_hardware_enable(void *garbage)
>> +int kvm_arch_hardware_enable(void *garbage)
>> {
>> 	long  status;
>> 	long  tmp_base;
>> @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
>> 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
>> 	local_irq_restore(saved_psr);
>> 	if (slot < 0)
>> -		return;
>> +		return -EINVAL;
>>
>> 	spin_lock(&vp_lock);
>> 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
>> @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
>> 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
>> 	if (status != 0) {
>> 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
>> -		return ;
>> +		return -EINVAL;
>> 	}
>>
>> 	if (!kvm_vsa_base) {
>> @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
>> 	}
>> 	spin_unlock(&vp_lock);
>> 	ia64_ptr_entry(0x3, slot);
>> +
>> +	return 0;
>> }
>>
>> void kvm_arch_hardware_disable(void *garbage)
>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
>> index 95af622..5902bbc 100644
>> --- a/arch/powerpc/kvm/powerpc.c
>> +++ b/arch/powerpc/kvm/powerpc.c
>> @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run,  
>> struct kvm_vcpu *vcpu)
>> 	return r;
>> }
>>
>> -void kvm_arch_hardware_enable(void *garbage)
>> +int kvm_arch_hardware_enable(void *garbage)
>> {
>> +	return 0;
>> }
>>
>> void kvm_arch_hardware_disable(void *garbage)
>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>> index 00e2ce8..5445058 100644
>> --- a/arch/s390/kvm/kvm-s390.c
>> +++ b/arch/s390/kvm/kvm-s390.c
>> @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[]  
>> = {
>> static unsigned long long *facilities;
>>
>> /* Section: not file related */
>> -void kvm_arch_hardware_enable(void *garbage)
>> +int kvm_arch_hardware_enable(void *garbage)
>> {
>> 	/* every s390 is virtualization enabled ;-) */
>> +	return 0;
>> }
>>
>> void kvm_arch_hardware_disable(void *garbage)
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/ 
>> kvm_host.h
>> index 6046e6f..b17886f 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -462,7 +462,7 @@ struct descriptor_table {
>> struct kvm_x86_ops {
>> 	int (*cpu_has_kvm_support)(void);          /* __init */
>> 	int (*disabled_by_bios)(void);             /* __init */
>> -	void (*hardware_enable)(void *dummy);      /* __init */
>> +	int (*hardware_enable)(void *dummy);
>> 	void (*hardware_disable)(void *dummy);
>> 	void (*check_processor_compatibility)(void *rtn);
>> 	int (*hardware_setup)(void);               /* __init */
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index a5f90c7..2f3a388 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
>> 	cpu_svm_disable();
>> }
>>
>> -static void svm_hardware_enable(void *garbage)
>> +static int svm_hardware_enable(void *garbage)
>> {
>>
>> 	struct svm_cpu_data *svm_data;
>> @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage)
>> 	struct desc_struct *gdt;
>> 	int me = raw_smp_processor_id();
>>
>> +	rdmsrl(MSR_EFER, efer);
>> +	if (efer & EFER_SVME)
>> +		return -EBUSY;
>> +
>> 	if (!has_svm()) {
>> 		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
>> -		return;
>> +		return -EINVAL;
>> 	}
>> 	svm_data = per_cpu(svm_data, me);
>>
>> 	if (!svm_data) {
>> 		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
>> 		       me);
>> -		return;
>> +		return -EINVAL;
>> 	}
>>
>> 	svm_data->asid_generation = 1;
>> @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage)
>> 	gdt = (struct desc_struct *)gdt_descr.base;
>> 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt +  
>> GDT_ENTRY_TSS);
>>
>> -	rdmsrl(MSR_EFER, efer);
>> 	wrmsrl(MSR_EFER, efer | EFER_SVME);
>>
>> 	wrmsrl(MSR_VM_HSAVE_PA,
>> 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
>> +
>> +	return 0;
>> }
>>
>> static void svm_cpu_uninit(int cpu)
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index d3213ac..c20a902 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void)
>> 	/* locked but not enabled */
>> }
>>
>> -static void hardware_enable(void *garbage)
>> +static int hardware_enable(void *garbage)
>> {
>> 	int cpu = raw_smp_processor_id();
>> 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
>> 	u64 old;
>>
>> +	if (read_cr4() & X86_CR4_VMXE)
>> +		return -EBUSY;
>> +
>> 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
>> 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
>> 	if ((old & (FEATURE_CONTROL_LOCKED |
>> @@ -1158,6 +1161,8 @@ static void hardware_enable(void *garbage)
>> 	asm volatile (ASM_VMX_VMXON_RAX
>> 		      : : "a"(&phys_addr), "m"(phys_addr)
>> 		      : "memory", "cc");
>> +
>> +	return 0;
>> }
>>
>> static void vmclear_local_vcpus(void)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 891234b..ec16169 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -4703,9 +4703,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
>> 	return kvm_x86_ops->vcpu_reset(vcpu);
>> }
>>
>> -void kvm_arch_hardware_enable(void *garbage)
>> +int kvm_arch_hardware_enable(void *garbage)
>> {
>> -	kvm_x86_ops->hardware_enable(garbage);
>> +	return kvm_x86_ops->hardware_enable(garbage);
>> }
>>
>> void kvm_arch_hardware_disable(void *garbage)
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 3e57be4..0bf9ee9 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -346,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
>> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
>>
>> int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
>> -void kvm_arch_hardware_enable(void *garbage);
>> +int kvm_arch_hardware_enable(void *garbage);
>> void kvm_arch_hardware_disable(void *garbage);
>> int kvm_arch_hardware_setup(void);
>> void kvm_arch_hardware_unsetup(void);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 6ce5ef3..39f0f5e 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
>> LIST_HEAD(vm_list);
>>
>> static cpumask_var_t cpus_hardware_enabled;
>> +static int kvm_usage_count = 0;
>> +static atomic_t hardware_enable_failed;
>>
>> struct kmem_cache *kvm_vcpu_cache;
>> EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
>> @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
>>
>> static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
>> 			   unsigned long arg);
>> +static int hardware_enable_all(void);
>> +static void hardware_disable_all(void);
>>
>> static bool kvm_rebooting;
>>
>> @@ -326,6 +330,7 @@ static const struct mmu_notifier_ops  
>> kvm_mmu_notifier_ops = {
>>
>> static struct kvm *kvm_create_vm(void)
>> {
>> +	int r = 0;
>> 	struct kvm *kvm = kvm_arch_create_vm();
>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>> 	struct page *page;
>> @@ -333,6 +338,11 @@ static struct kvm *kvm_create_vm(void)
>>
>> 	if (IS_ERR(kvm))
>> 		goto out;
>> +
>> +	r = hardware_enable_all();
>> +	if (r)
>> +		goto out_err;
>> +
>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
>> 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>> 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
>> @@ -341,8 +351,8 @@ static struct kvm *kvm_create_vm(void)
>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>> 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> 	if (!page) {
>> -		kfree(kvm);
>> -		return ERR_PTR(-ENOMEM);
>> +		r = -ENOMEM;
>> +		goto out_err;
>> 	}
>> 	kvm->coalesced_mmio_ring =
>> 			(struct kvm_coalesced_mmio_ring *)page_address(page);
>> @@ -350,15 +360,13 @@ static struct kvm *kvm_create_vm(void)
>>
>> #if defined(CONFIG_MMU_NOTIFIER) &&  
>> defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>> 	{
>> -		int err;
>> 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>> -		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>> -		if (err) {
>> +		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>> +		if (r) {
>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>> 			put_page(page);
>> #endif
>> -			kfree(kvm);
>> -			return ERR_PTR(err);
>> +			goto out_err;
>> 		}
>> 	}
>> #endif
>> @@ -382,6 +390,11 @@ static struct kvm *kvm_create_vm(void)
>> #endif
>> out:
>> 	return kvm;
>> +
>> +out_err:
>> +	hardware_disable_all();
>> +	kfree(kvm);
>> +	return ERR_PTR(r);
>> }
>>
>> /*
>> @@ -440,6 +453,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>> 	kvm_arch_flush_shadow(kvm);
>> #endif
>> 	kvm_arch_destroy_vm(kvm);
>> +	hardware_disable_all();
>> 	mmdrop(mm);
>> }
>>
>> @@ -1631,11 +1645,41 @@ static struct miscdevice kvm_dev = {
>> static void hardware_enable(void *junk)
>> {
>> 	int cpu = raw_smp_processor_id();
>> +	int r;
>>
>> 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
>> 		return;
>> +
>> 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
>> -	kvm_arch_hardware_enable(NULL);
>> +
>> +	r = kvm_arch_hardware_enable(NULL);
>> +
>> +	if (r) {
>> +		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
>> +		atomic_inc(&hardware_enable_failed);
>> +		printk(KERN_INFO "kvm: enabling virtualization on "
>> +				 "CPU%d failed\n", cpu);
>> +	}
>> +}
>> +
>> +static int hardware_enable_all(void)
>> +{
>> +	int r = 0;
>> +
>> +	spin_lock(&kvm_lock);
>> +
>> +	kvm_usage_count++;
>> +	if (kvm_usage_count == 1) {
>> +		atomic_set(&hardware_enable_failed, 0);
>> +		on_each_cpu(hardware_enable, NULL, 1);
>> +
>> +		if (atomic_read(&hardware_enable_failed))
>> +			r = -EBUSY;
>> +	}
>> +
>> +	spin_unlock(&kvm_lock);
>> +
>> +	return r;
>> }
>
> I think the kvm_usage_count > 1 path should also test for
> hardware_enable_failed (you assume that if kvm_usage_count > 1
> then hardware enablement has succeeded, which is not always true).

I don't see how kvm_usage_count could be > 1 without hardware  
enablement having succeeded. The hardware_enable_all caller calls  
hardware_disable_all (kvm_usage_count--) when enabling fails.

> Also, better move vmx.c's ept_sync_global from vmx_init to
> hardware_enable.

Why? What does that do?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-09-14 15:52   ` Alexander Graf
@ 2009-09-14 16:14     ` Marcelo Tosatti
  2009-09-14 16:25       ` Alexander Graf
  0 siblings, 1 reply; 13+ messages in thread
From: Marcelo Tosatti @ 2009-09-14 16:14 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm

On Mon, Sep 14, 2009 at 05:52:48PM +0200, Alexander Graf wrote:
>
> On 14.09.2009, at 15:23, Marcelo Tosatti wrote:
>
>> On Wed, Sep 09, 2009 at 04:18:58PM +0200, Alexander Graf wrote:
>>> X86 CPUs need to have some magic happening to enable the  
>>> virtualization
>>> extensions on them. This magic can result in unpleasant results for
>>> users, like blocking other VMMs from working (vmx) or using invalid  
>>> TLB
>>> entries (svm).
>>>
>>> Currently KVM activates virtualization when the respective kernel  
>>> module
>>> is loaded. This blocks us from autoloading KVM modules without  
>>> breaking
>>> other VMMs.
>>>
>>> To circumvent this problem at least a bit, this patch introduces on
>>> demand activation of virtualization. This means, that instead
>>> virtualization is enabled on creation of the first virtual machine
>>> and disabled on destruction of the last one.
>>>
>>> So using this, KVM can be easily autoloaded, while keeping other
>>> hypervisors usable.
>>>
>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>
>>> --
>>>
>>> I've tested the following:
>>>
>>>  - shutdown
>>>  - suspend / resume to RAM
>>>  - running VirtualBox while kvm module is loaded
>>> ---
>>> arch/ia64/kvm/kvm-ia64.c        |    8 ++-
>>> arch/powerpc/kvm/powerpc.c      |    3 +-
>>> arch/s390/kvm/kvm-s390.c        |    3 +-
>>> arch/x86/include/asm/kvm_host.h |    2 +-
>>> arch/x86/kvm/svm.c              |   13 ++++--
>>> arch/x86/kvm/vmx.c              |    7 +++-
>>> arch/x86/kvm/x86.c              |    4 +-
>>> include/linux/kvm_host.h        |    2 +-
>>> virt/kvm/kvm_main.c             |   82 +++++++++++++++++++++++++++++ 
>>> ++++------
>>> 9 files changed, 98 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
>>> index f6471c8..5fdeec5 100644
>>> --- a/arch/ia64/kvm/kvm-ia64.c
>>> +++ b/arch/ia64/kvm/kvm-ia64.c
>>> @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, 
>>> u64 *opt_handler)
>>>
>>> static  DEFINE_SPINLOCK(vp_lock);
>>>
>>> -void kvm_arch_hardware_enable(void *garbage)
>>> +int kvm_arch_hardware_enable(void *garbage)
>>> {
>>> 	long  status;
>>> 	long  tmp_base;
>>> @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
>>> 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
>>> 	local_irq_restore(saved_psr);
>>> 	if (slot < 0)
>>> -		return;
>>> +		return -EINVAL;
>>>
>>> 	spin_lock(&vp_lock);
>>> 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
>>> @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
>>> 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
>>> 	if (status != 0) {
>>> 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
>>> -		return ;
>>> +		return -EINVAL;
>>> 	}
>>>
>>> 	if (!kvm_vsa_base) {
>>> @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
>>> 	}
>>> 	spin_unlock(&vp_lock);
>>> 	ia64_ptr_entry(0x3, slot);
>>> +
>>> +	return 0;
>>> }
>>>
>>> void kvm_arch_hardware_disable(void *garbage)
>>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
>>> index 95af622..5902bbc 100644
>>> --- a/arch/powerpc/kvm/powerpc.c
>>> +++ b/arch/powerpc/kvm/powerpc.c
>>> @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run,  
>>> struct kvm_vcpu *vcpu)
>>> 	return r;
>>> }
>>>
>>> -void kvm_arch_hardware_enable(void *garbage)
>>> +int kvm_arch_hardware_enable(void *garbage)
>>> {
>>> +	return 0;
>>> }
>>>
>>> void kvm_arch_hardware_disable(void *garbage)
>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>> index 00e2ce8..5445058 100644
>>> --- a/arch/s390/kvm/kvm-s390.c
>>> +++ b/arch/s390/kvm/kvm-s390.c
>>> @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = 
>>> {
>>> static unsigned long long *facilities;
>>>
>>> /* Section: not file related */
>>> -void kvm_arch_hardware_enable(void *garbage)
>>> +int kvm_arch_hardware_enable(void *garbage)
>>> {
>>> 	/* every s390 is virtualization enabled ;-) */
>>> +	return 0;
>>> }
>>>
>>> void kvm_arch_hardware_disable(void *garbage)
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/ 
>>> kvm_host.h
>>> index 6046e6f..b17886f 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -462,7 +462,7 @@ struct descriptor_table {
>>> struct kvm_x86_ops {
>>> 	int (*cpu_has_kvm_support)(void);          /* __init */
>>> 	int (*disabled_by_bios)(void);             /* __init */
>>> -	void (*hardware_enable)(void *dummy);      /* __init */
>>> +	int (*hardware_enable)(void *dummy);
>>> 	void (*hardware_disable)(void *dummy);
>>> 	void (*check_processor_compatibility)(void *rtn);
>>> 	int (*hardware_setup)(void);               /* __init */
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index a5f90c7..2f3a388 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
>>> 	cpu_svm_disable();
>>> }
>>>
>>> -static void svm_hardware_enable(void *garbage)
>>> +static int svm_hardware_enable(void *garbage)
>>> {
>>>
>>> 	struct svm_cpu_data *svm_data;
>>> @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage)
>>> 	struct desc_struct *gdt;
>>> 	int me = raw_smp_processor_id();
>>>
>>> +	rdmsrl(MSR_EFER, efer);
>>> +	if (efer & EFER_SVME)
>>> +		return -EBUSY;
>>> +
>>> 	if (!has_svm()) {
>>> 		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
>>> -		return;
>>> +		return -EINVAL;
>>> 	}
>>> 	svm_data = per_cpu(svm_data, me);
>>>
>>> 	if (!svm_data) {
>>> 		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
>>> 		       me);
>>> -		return;
>>> +		return -EINVAL;
>>> 	}
>>>
>>> 	svm_data->asid_generation = 1;
>>> @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage)
>>> 	gdt = (struct desc_struct *)gdt_descr.base;
>>> 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt +  
>>> GDT_ENTRY_TSS);
>>>
>>> -	rdmsrl(MSR_EFER, efer);
>>> 	wrmsrl(MSR_EFER, efer | EFER_SVME);
>>>
>>> 	wrmsrl(MSR_VM_HSAVE_PA,
>>> 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
>>> +
>>> +	return 0;
>>> }
>>>
>>> static void svm_cpu_uninit(int cpu)
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index d3213ac..c20a902 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void)
>>> 	/* locked but not enabled */
>>> }
>>>
>>> -static void hardware_enable(void *garbage)
>>> +static int hardware_enable(void *garbage)
>>> {
>>> 	int cpu = raw_smp_processor_id();
>>> 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
>>> 	u64 old;
>>>
>>> +	if (read_cr4() & X86_CR4_VMXE)
>>> +		return -EBUSY;
>>> +
>>> 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
>>> 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
>>> 	if ((old & (FEATURE_CONTROL_LOCKED |
>>> @@ -1158,6 +1161,8 @@ static void hardware_enable(void *garbage)
>>> 	asm volatile (ASM_VMX_VMXON_RAX
>>> 		      : : "a"(&phys_addr), "m"(phys_addr)
>>> 		      : "memory", "cc");
>>> +
>>> +	return 0;
>>> }
>>>
>>> static void vmclear_local_vcpus(void)
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 891234b..ec16169 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -4703,9 +4703,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
>>> 	return kvm_x86_ops->vcpu_reset(vcpu);
>>> }
>>>
>>> -void kvm_arch_hardware_enable(void *garbage)
>>> +int kvm_arch_hardware_enable(void *garbage)
>>> {
>>> -	kvm_x86_ops->hardware_enable(garbage);
>>> +	return kvm_x86_ops->hardware_enable(garbage);
>>> }
>>>
>>> void kvm_arch_hardware_disable(void *garbage)
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index 3e57be4..0bf9ee9 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -346,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
>>> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
>>>
>>> int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
>>> -void kvm_arch_hardware_enable(void *garbage);
>>> +int kvm_arch_hardware_enable(void *garbage);
>>> void kvm_arch_hardware_disable(void *garbage);
>>> int kvm_arch_hardware_setup(void);
>>> void kvm_arch_hardware_unsetup(void);
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index 6ce5ef3..39f0f5e 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
>>> LIST_HEAD(vm_list);
>>>
>>> static cpumask_var_t cpus_hardware_enabled;
>>> +static int kvm_usage_count = 0;
>>> +static atomic_t hardware_enable_failed;
>>>
>>> struct kmem_cache *kvm_vcpu_cache;
>>> EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
>>> @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
>>>
>>> static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
>>> 			   unsigned long arg);
>>> +static int hardware_enable_all(void);
>>> +static void hardware_disable_all(void);
>>>
>>> static bool kvm_rebooting;
>>>
>>> @@ -326,6 +330,7 @@ static const struct mmu_notifier_ops  
>>> kvm_mmu_notifier_ops = {
>>>
>>> static struct kvm *kvm_create_vm(void)
>>> {
>>> +	int r = 0;
>>> 	struct kvm *kvm = kvm_arch_create_vm();
>>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>>> 	struct page *page;
>>> @@ -333,6 +338,11 @@ static struct kvm *kvm_create_vm(void)
>>>
>>> 	if (IS_ERR(kvm))
>>> 		goto out;
>>> +
>>> +	r = hardware_enable_all();
>>> +	if (r)
>>> +		goto out_err;
>>> +
>>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>> 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>>> 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
>>> @@ -341,8 +351,8 @@ static struct kvm *kvm_create_vm(void)
>>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>>> 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>> 	if (!page) {
>>> -		kfree(kvm);
>>> -		return ERR_PTR(-ENOMEM);
>>> +		r = -ENOMEM;
>>> +		goto out_err;
>>> 	}
>>> 	kvm->coalesced_mmio_ring =
>>> 			(struct kvm_coalesced_mmio_ring *)page_address(page);
>>> @@ -350,15 +360,13 @@ static struct kvm *kvm_create_vm(void)
>>>
>>> #if defined(CONFIG_MMU_NOTIFIER) &&  
>>> defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>>> 	{
>>> -		int err;
>>> 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>>> -		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>>> -		if (err) {
>>> +		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>>> +		if (r) {
>>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>>> 			put_page(page);
>>> #endif
>>> -			kfree(kvm);
>>> -			return ERR_PTR(err);
>>> +			goto out_err;
>>> 		}
>>> 	}
>>> #endif
>>> @@ -382,6 +390,11 @@ static struct kvm *kvm_create_vm(void)
>>> #endif
>>> out:
>>> 	return kvm;
>>> +
>>> +out_err:
>>> +	hardware_disable_all();
>>> +	kfree(kvm);
>>> +	return ERR_PTR(r);
>>> }
>>>
>>> /*
>>> @@ -440,6 +453,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>>> 	kvm_arch_flush_shadow(kvm);
>>> #endif
>>> 	kvm_arch_destroy_vm(kvm);
>>> +	hardware_disable_all();
>>> 	mmdrop(mm);
>>> }
>>>
>>> @@ -1631,11 +1645,41 @@ static struct miscdevice kvm_dev = {
>>> static void hardware_enable(void *junk)
>>> {
>>> 	int cpu = raw_smp_processor_id();
>>> +	int r;
>>>
>>> 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
>>> 		return;
>>> +
>>> 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
>>> -	kvm_arch_hardware_enable(NULL);
>>> +
>>> +	r = kvm_arch_hardware_enable(NULL);
>>> +
>>> +	if (r) {
>>> +		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
>>> +		atomic_inc(&hardware_enable_failed);
>>> +		printk(KERN_INFO "kvm: enabling virtualization on "
>>> +				 "CPU%d failed\n", cpu);
>>> +	}
>>> +}
>>> +
>>> +static int hardware_enable_all(void)
>>> +{
>>> +	int r = 0;
>>> +
>>> +	spin_lock(&kvm_lock);
>>> +
>>> +	kvm_usage_count++;
>>> +	if (kvm_usage_count == 1) {
>>> +		atomic_set(&hardware_enable_failed, 0);
>>> +		on_each_cpu(hardware_enable, NULL, 1);
>>> +
>>> +		if (atomic_read(&hardware_enable_failed))
>>> +			r = -EBUSY;
>>> +	}
>>> +
>>> +	spin_unlock(&kvm_lock);
>>> +
>>> +	return r;
>>> }
>>
>> I think the kvm_usage_count > 1 path should also test for
>> hardware_enable_failed (you assume that if kvm_usage_count > 1
>> then hardware enablement has succeeded, which is not always true).
>
> I don't see how kvm_usage_count could be > 1 without hardware enablement 
> having succeeded. The hardware_enable_all caller calls  
> hardware_disable_all (kvm_usage_count--) when enabling fails.

But it does not hold any lock in between hardware_enable_all
and hardware_disable_all.

So its unsafe if another kvm_create_vm call happens in between, while
kvm_usage_count is 1 ?

>> Also, better move vmx.c's ept_sync_global from vmx_init to
>> hardware_enable.
>
> Why? What does that do?

25.3.3.4 Guidelines for Use of the INVEPT Instruction

Software can use the INVEPT instruction with the “all-context”
INVEPT type immediately after execution of the VMXON instruction or
immediately prior to execution of the VMXOFF instruction. Either
prevents potentially undesired retention of information cached from EPT
paging structures between separate uses of VMX operation.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-09-14 16:14     ` Marcelo Tosatti
@ 2009-09-14 16:25       ` Alexander Graf
  2009-09-14 16:46         ` Marcelo Tosatti
  0 siblings, 1 reply; 13+ messages in thread
From: Alexander Graf @ 2009-09-14 16:25 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm


On 14.09.2009, at 18:14, Marcelo Tosatti wrote:

> On Mon, Sep 14, 2009 at 05:52:48PM +0200, Alexander Graf wrote:
>>
>> On 14.09.2009, at 15:23, Marcelo Tosatti wrote:
>>
>>> On Wed, Sep 09, 2009 at 04:18:58PM +0200, Alexander Graf wrote:
>>>> X86 CPUs need to have some magic happening to enable the
>>>> virtualization
>>>> extensions on them. This magic can result in unpleasant results for
>>>> users, like blocking other VMMs from working (vmx) or using invalid
>>>> TLB
>>>> entries (svm).
>>>>
>>>> Currently KVM activates virtualization when the respective kernel
>>>> module
>>>> is loaded. This blocks us from autoloading KVM modules without
>>>> breaking
>>>> other VMMs.
>>>>
>>>> To circumvent this problem at least a bit, this patch introduces on
>>>> demand activation of virtualization. This means, that instead
>>>> virtualization is enabled on creation of the first virtual machine
>>>> and disabled on destruction of the last one.
>>>>
>>>> So using this, KVM can be easily autoloaded, while keeping other
>>>> hypervisors usable.
>>>>
>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>>
>>>> --
>>>>
>>>> I've tested the following:
>>>>
>>>> - shutdown
>>>> - suspend / resume to RAM
>>>> - running VirtualBox while kvm module is loaded
>>>> ---
>>>> arch/ia64/kvm/kvm-ia64.c        |    8 ++-
>>>> arch/powerpc/kvm/powerpc.c      |    3 +-
>>>> arch/s390/kvm/kvm-s390.c        |    3 +-
>>>> arch/x86/include/asm/kvm_host.h |    2 +-
>>>> arch/x86/kvm/svm.c              |   13 ++++--
>>>> arch/x86/kvm/vmx.c              |    7 +++-
>>>> arch/x86/kvm/x86.c              |    4 +-
>>>> include/linux/kvm_host.h        |    2 +-
>>>> virt/kvm/kvm_main.c             |   82 +++++++++++++++++++++++++++ 
>>>> ++
>>>> ++++------
>>>> 9 files changed, 98 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
>>>> index f6471c8..5fdeec5 100644
>>>> --- a/arch/ia64/kvm/kvm-ia64.c
>>>> +++ b/arch/ia64/kvm/kvm-ia64.c
>>>> @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64  
>>>> *host_iva,
>>>> u64 *opt_handler)
>>>>
>>>> static  DEFINE_SPINLOCK(vp_lock);
>>>>
>>>> -void kvm_arch_hardware_enable(void *garbage)
>>>> +int kvm_arch_hardware_enable(void *garbage)
>>>> {
>>>> 	long  status;
>>>> 	long  tmp_base;
>>>> @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
>>>> 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
>>>> 	local_irq_restore(saved_psr);
>>>> 	if (slot < 0)
>>>> -		return;
>>>> +		return -EINVAL;
>>>>
>>>> 	spin_lock(&vp_lock);
>>>> 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
>>>> @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
>>>> 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
>>>> 	if (status != 0) {
>>>> 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
>>>> -		return ;
>>>> +		return -EINVAL;
>>>> 	}
>>>>
>>>> 	if (!kvm_vsa_base) {
>>>> @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
>>>> 	}
>>>> 	spin_unlock(&vp_lock);
>>>> 	ia64_ptr_entry(0x3, slot);
>>>> +
>>>> +	return 0;
>>>> }
>>>>
>>>> void kvm_arch_hardware_disable(void *garbage)
>>>> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/ 
>>>> powerpc.c
>>>> index 95af622..5902bbc 100644
>>>> --- a/arch/powerpc/kvm/powerpc.c
>>>> +++ b/arch/powerpc/kvm/powerpc.c
>>>> @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run,
>>>> struct kvm_vcpu *vcpu)
>>>> 	return r;
>>>> }
>>>>
>>>> -void kvm_arch_hardware_enable(void *garbage)
>>>> +int kvm_arch_hardware_enable(void *garbage)
>>>> {
>>>> +	return 0;
>>>> }
>>>>
>>>> void kvm_arch_hardware_disable(void *garbage)
>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>> index 00e2ce8..5445058 100644
>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>> @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item  
>>>> debugfs_entries[] =
>>>> {
>>>> static unsigned long long *facilities;
>>>>
>>>> /* Section: not file related */
>>>> -void kvm_arch_hardware_enable(void *garbage)
>>>> +int kvm_arch_hardware_enable(void *garbage)
>>>> {
>>>> 	/* every s390 is virtualization enabled ;-) */
>>>> +	return 0;
>>>> }
>>>>
>>>> void kvm_arch_hardware_disable(void *garbage)
>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/ 
>>>> asm/
>>>> kvm_host.h
>>>> index 6046e6f..b17886f 100644
>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>> @@ -462,7 +462,7 @@ struct descriptor_table {
>>>> struct kvm_x86_ops {
>>>> 	int (*cpu_has_kvm_support)(void);          /* __init */
>>>> 	int (*disabled_by_bios)(void);             /* __init */
>>>> -	void (*hardware_enable)(void *dummy);      /* __init */
>>>> +	int (*hardware_enable)(void *dummy);
>>>> 	void (*hardware_disable)(void *dummy);
>>>> 	void (*check_processor_compatibility)(void *rtn);
>>>> 	int (*hardware_setup)(void);               /* __init */
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index a5f90c7..2f3a388 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
>>>> 	cpu_svm_disable();
>>>> }
>>>>
>>>> -static void svm_hardware_enable(void *garbage)
>>>> +static int svm_hardware_enable(void *garbage)
>>>> {
>>>>
>>>> 	struct svm_cpu_data *svm_data;
>>>> @@ -325,16 +325,20 @@ static void svm_hardware_enable(void  
>>>> *garbage)
>>>> 	struct desc_struct *gdt;
>>>> 	int me = raw_smp_processor_id();
>>>>
>>>> +	rdmsrl(MSR_EFER, efer);
>>>> +	if (efer & EFER_SVME)
>>>> +		return -EBUSY;
>>>> +
>>>> 	if (!has_svm()) {
>>>> 		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
>>>> -		return;
>>>> +		return -EINVAL;
>>>> 	}
>>>> 	svm_data = per_cpu(svm_data, me);
>>>>
>>>> 	if (!svm_data) {
>>>> 		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
>>>> 		       me);
>>>> -		return;
>>>> +		return -EINVAL;
>>>> 	}
>>>>
>>>> 	svm_data->asid_generation = 1;
>>>> @@ -345,11 +349,12 @@ static void svm_hardware_enable(void  
>>>> *garbage)
>>>> 	gdt = (struct desc_struct *)gdt_descr.base;
>>>> 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt +
>>>> GDT_ENTRY_TSS);
>>>>
>>>> -	rdmsrl(MSR_EFER, efer);
>>>> 	wrmsrl(MSR_EFER, efer | EFER_SVME);
>>>>
>>>> 	wrmsrl(MSR_VM_HSAVE_PA,
>>>> 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
>>>> +
>>>> +	return 0;
>>>> }
>>>>
>>>> static void svm_cpu_uninit(int cpu)
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index d3213ac..c20a902 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -1138,12 +1138,15 @@ static __init int  
>>>> vmx_disabled_by_bios(void)
>>>> 	/* locked but not enabled */
>>>> }
>>>>
>>>> -static void hardware_enable(void *garbage)
>>>> +static int hardware_enable(void *garbage)
>>>> {
>>>> 	int cpu = raw_smp_processor_id();
>>>> 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
>>>> 	u64 old;
>>>>
>>>> +	if (read_cr4() & X86_CR4_VMXE)
>>>> +		return -EBUSY;
>>>> +
>>>> 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
>>>> 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
>>>> 	if ((old & (FEATURE_CONTROL_LOCKED |
>>>> @@ -1158,6 +1161,8 @@ static void hardware_enable(void *garbage)
>>>> 	asm volatile (ASM_VMX_VMXON_RAX
>>>> 		      : : "a"(&phys_addr), "m"(phys_addr)
>>>> 		      : "memory", "cc");
>>>> +
>>>> +	return 0;
>>>> }
>>>>
>>>> static void vmclear_local_vcpus(void)
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 891234b..ec16169 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -4703,9 +4703,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu  
>>>> *vcpu)
>>>> 	return kvm_x86_ops->vcpu_reset(vcpu);
>>>> }
>>>>
>>>> -void kvm_arch_hardware_enable(void *garbage)
>>>> +int kvm_arch_hardware_enable(void *garbage)
>>>> {
>>>> -	kvm_x86_ops->hardware_enable(garbage);
>>>> +	return kvm_x86_ops->hardware_enable(garbage);
>>>> }
>>>>
>>>> void kvm_arch_hardware_disable(void *garbage)
>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>> index 3e57be4..0bf9ee9 100644
>>>> --- a/include/linux/kvm_host.h
>>>> +++ b/include/linux/kvm_host.h
>>>> @@ -346,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
>>>> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
>>>>
>>>> int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
>>>> -void kvm_arch_hardware_enable(void *garbage);
>>>> +int kvm_arch_hardware_enable(void *garbage);
>>>> void kvm_arch_hardware_disable(void *garbage);
>>>> int kvm_arch_hardware_setup(void);
>>>> void kvm_arch_hardware_unsetup(void);
>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>> index 6ce5ef3..39f0f5e 100644
>>>> --- a/virt/kvm/kvm_main.c
>>>> +++ b/virt/kvm/kvm_main.c
>>>> @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
>>>> LIST_HEAD(vm_list);
>>>>
>>>> static cpumask_var_t cpus_hardware_enabled;
>>>> +static int kvm_usage_count = 0;
>>>> +static atomic_t hardware_enable_failed;
>>>>
>>>> struct kmem_cache *kvm_vcpu_cache;
>>>> EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
>>>> @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
>>>>
>>>> static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
>>>> 			   unsigned long arg);
>>>> +static int hardware_enable_all(void);
>>>> +static void hardware_disable_all(void);
>>>>
>>>> static bool kvm_rebooting;
>>>>
>>>> @@ -326,6 +330,7 @@ static const struct mmu_notifier_ops
>>>> kvm_mmu_notifier_ops = {
>>>>
>>>> static struct kvm *kvm_create_vm(void)
>>>> {
>>>> +	int r = 0;
>>>> 	struct kvm *kvm = kvm_arch_create_vm();
>>>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>>>> 	struct page *page;
>>>> @@ -333,6 +338,11 @@ static struct kvm *kvm_create_vm(void)
>>>>
>>>> 	if (IS_ERR(kvm))
>>>> 		goto out;
>>>> +
>>>> +	r = hardware_enable_all();
>>>> +	if (r)
>>>> +		goto out_err;
>>>> +
>>>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>>> 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>>>> 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
>>>> @@ -341,8 +351,8 @@ static struct kvm *kvm_create_vm(void)
>>>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>>>> 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>> 	if (!page) {
>>>> -		kfree(kvm);
>>>> -		return ERR_PTR(-ENOMEM);
>>>> +		r = -ENOMEM;
>>>> +		goto out_err;
>>>> 	}
>>>> 	kvm->coalesced_mmio_ring =
>>>> 			(struct kvm_coalesced_mmio_ring *)page_address(page);
>>>> @@ -350,15 +360,13 @@ static struct kvm *kvm_create_vm(void)
>>>>
>>>> #if defined(CONFIG_MMU_NOTIFIER) &&
>>>> defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>>>> 	{
>>>> -		int err;
>>>> 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>>>> -		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>>>> -		if (err) {
>>>> +		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>>>> +		if (r) {
>>>> #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>>>> 			put_page(page);
>>>> #endif
>>>> -			kfree(kvm);
>>>> -			return ERR_PTR(err);
>>>> +			goto out_err;
>>>> 		}
>>>> 	}
>>>> #endif
>>>> @@ -382,6 +390,11 @@ static struct kvm *kvm_create_vm(void)
>>>> #endif
>>>> out:
>>>> 	return kvm;
>>>> +
>>>> +out_err:
>>>> +	hardware_disable_all();
>>>> +	kfree(kvm);
>>>> +	return ERR_PTR(r);
>>>> }
>>>>
>>>> /*
>>>> @@ -440,6 +453,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>>>> 	kvm_arch_flush_shadow(kvm);
>>>> #endif
>>>> 	kvm_arch_destroy_vm(kvm);
>>>> +	hardware_disable_all();
>>>> 	mmdrop(mm);
>>>> }
>>>>
>>>> @@ -1631,11 +1645,41 @@ static struct miscdevice kvm_dev = {
>>>> static void hardware_enable(void *junk)
>>>> {
>>>> 	int cpu = raw_smp_processor_id();
>>>> +	int r;
>>>>
>>>> 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
>>>> 		return;
>>>> +
>>>> 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
>>>> -	kvm_arch_hardware_enable(NULL);
>>>> +
>>>> +	r = kvm_arch_hardware_enable(NULL);
>>>> +
>>>> +	if (r) {
>>>> +		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
>>>> +		atomic_inc(&hardware_enable_failed);
>>>> +		printk(KERN_INFO "kvm: enabling virtualization on "
>>>> +				 "CPU%d failed\n", cpu);
>>>> +	}
>>>> +}
>>>> +
>>>> +static int hardware_enable_all(void)
>>>> +{
>>>> +	int r = 0;
>>>> +
>>>> +	spin_lock(&kvm_lock);
>>>> +
>>>> +	kvm_usage_count++;
>>>> +	if (kvm_usage_count == 1) {
>>>> +		atomic_set(&hardware_enable_failed, 0);
>>>> +		on_each_cpu(hardware_enable, NULL, 1);
>>>> +
>>>> +		if (atomic_read(&hardware_enable_failed))
>>>> +			r = -EBUSY;
>>>> +	}
>>>> +
>>>> +	spin_unlock(&kvm_lock);
>>>> +
>>>> +	return r;
>>>> }
>>>
>>> I think the kvm_usage_count > 1 path should also test for
>>> hardware_enable_failed (you assume that if kvm_usage_count > 1
>>> then hardware enablement has succeeded, which is not always true).
>>
>> I don't see how kvm_usage_count could be > 1 without hardware  
>> enablement
>> having succeeded. The hardware_enable_all caller calls
>> hardware_disable_all (kvm_usage_count--) when enabling fails.
>
> But it does not hold any lock in between hardware_enable_all
> and hardware_disable_all.
>
> So its unsafe if another kvm_create_vm call happens in between, while
> kvm_usage_count is 1 ?

So what we really need is a lock, so hardware_enable_all doesn't get  
called twice? Isn't that what the kvm_lock here does?

>>> Also, better move vmx.c's ept_sync_global from vmx_init to
>>> hardware_enable.
>>
>> Why? What does that do?
>
> 25.3.3.4 Guidelines for Use of the INVEPT Instruction
>
> Software can use the INVEPT instruction with the “all-context”
> INVEPT type immediately after execution of the VMXON instruction or
> immediately prior to execution of the VMXOFF instruction. Either
> prevents potentially undesired retention of information cached from  
> EPT
> paging structures between separate uses of VMX operation.

Hmhm. I don't have EPT hardware to test things on, but I can of course  
make a blind move of the call.

Alex

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-09-14 16:25       ` Alexander Graf
@ 2009-09-14 16:46         ` Marcelo Tosatti
  2009-09-14 16:54           ` Alexander Graf
  0 siblings, 1 reply; 13+ messages in thread
From: Marcelo Tosatti @ 2009-09-14 16:46 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm

On Mon, Sep 14, 2009 at 06:25:20PM +0200, Alexander Graf wrote:
>>> having succeeded. The hardware_enable_all caller calls
>>> hardware_disable_all (kvm_usage_count--) when enabling fails.
>>
>> But it does not hold any lock in between hardware_enable_all
>> and hardware_disable_all.
>>
>> So its unsafe if another kvm_create_vm call happens in between, while
>> kvm_usage_count is 1 ?
>
> So what we really need is a lock, so hardware_enable_all doesn't get  
> called twice? Isn't that what the kvm_lock here does?

Either that or check hardware_enable_failed atomic variable even 
if kvm_usage_count > 1.

>>>> Also, better move vmx.c's ept_sync_global from vmx_init to
>>>> hardware_enable.
>>>
>>> Why? What does that do?
>>
>> 25.3.3.4 Guidelines for Use of the INVEPT Instruction
>>
>> Software can use the INVEPT instruction with the “all-context”
>> INVEPT type immediately after execution of the VMXON instruction or
>> immediately prior to execution of the VMXOFF instruction. Either
>> prevents potentially undesired retention of information cached from  
>> EPT
>> paging structures between separate uses of VMX operation.
>
> Hmhm. I don't have EPT hardware to test things on, but I can of course  
> make a blind move of the call.

OK, i can do some basic testing before applying the patch.

>
> Alex

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-09-14 16:46         ` Marcelo Tosatti
@ 2009-09-14 16:54           ` Alexander Graf
  0 siblings, 0 replies; 13+ messages in thread
From: Alexander Graf @ 2009-09-14 16:54 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm


On 14.09.2009, at 18:46, Marcelo Tosatti wrote:

> On Mon, Sep 14, 2009 at 06:25:20PM +0200, Alexander Graf wrote:
>>>> having succeeded. The hardware_enable_all caller calls
>>>> hardware_disable_all (kvm_usage_count--) when enabling fails.
>>>
>>> But it does not hold any lock in between hardware_enable_all
>>> and hardware_disable_all.
>>>
>>> So its unsafe if another kvm_create_vm call happens in between,  
>>> while
>>> kvm_usage_count is 1 ?
>>
>> So what we really need is a lock, so hardware_enable_all doesn't get
>> called twice? Isn't that what the kvm_lock here does?
>
> Either that or check hardware_enable_failed atomic variable even
> if kvm_usage_count > 1.

The patch does a lock already.

>>>>> Also, better move vmx.c's ept_sync_global from vmx_init to
>>>>> hardware_enable.
>>>>
>>>> Why? What does that do?
>>>
>>> 25.3.3.4 Guidelines for Use of the INVEPT Instruction
>>>
>>> Software can use the INVEPT instruction with the “all-context”
>>> INVEPT type immediately after execution of the VMXON instruction or
>>> immediately prior to execution of the VMXOFF instruction. Either
>>> prevents potentially undesired retention of information cached from
>>> EPT
>>> paging structures between separate uses of VMX operation.
>>
>> Hmhm. I don't have EPT hardware to test things on, but I can of  
>> course
>> make a blind move of the call.
>
> OK, i can do some basic testing before applying the patch.

Great :-)

Alex


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] Activate Virtualization On Demand
@ 2009-09-15  9:37 Alexander Graf
  0 siblings, 0 replies; 13+ messages in thread
From: Alexander Graf @ 2009-09-15  9:37 UTC (permalink / raw)
  To: kvm; +Cc: mtosatti

X86 CPUs need to have some magic happening to enable the virtualization
extensions on them. This magic can result in unpleasant results for
users, like blocking other VMMs from working (vmx) or using invalid TLB
entries (svm).

Currently KVM activates virtualization when the respective kernel module
is loaded. This blocks us from autoloading KVM modules without breaking
other VMMs.

To circumvent this problem at least a bit, this patch introduces on
demand activation of virtualization. This means, that instead
virtualization is enabled on creation of the first virtual machine
and disabled on destruction of the last one.

So using this, KVM can be easily autoloaded, while keeping other
hypervisors usable.

Signed-off-by: Alexander Graf <agraf@suse.de>

--

I've tested the following:

  - shutdown
  - suspend / resume to RAM
  - suspend / resume to DISK
  - running VirtualBox while kvm module is loaded

v1 -> v2

  - move failure disable into locked section
  - move vmx's ept_sync_global from init to hardware_enable
---
 arch/ia64/kvm/kvm-ia64.c        |    8 ++-
 arch/powerpc/kvm/powerpc.c      |    3 +-
 arch/s390/kvm/kvm-s390.c        |    3 +-
 arch/x86/include/asm/kvm_host.h |    2 +-
 arch/x86/kvm/svm.c              |   13 ++++--
 arch/x86/kvm/vmx.c              |   11 +++-
 arch/x86/kvm/x86.c              |    4 +-
 include/linux/kvm_host.h        |    2 +-
 virt/kvm/kvm_main.c             |   90 +++++++++++++++++++++++++++++++++-----
 9 files changed, 108 insertions(+), 28 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index f6471c8..5fdeec5 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	long  status;
 	long  tmp_base;
@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
 	local_irq_restore(saved_psr);
 	if (slot < 0)
-		return;
+		return -EINVAL;
 
 	spin_lock(&vp_lock);
 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
 	if (status != 0) {
 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return ;
+		return -EINVAL;
 	}
 
 	if (!kvm_vsa_base) {
@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
 	}
 	spin_unlock(&vp_lock);
 	ia64_ptr_entry(0x3, slot);
+
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 95af622..5902bbc 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 00e2ce8..5445058 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 static unsigned long long *facilities;
 
 /* Section: not file related */
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	/* every s390 is virtualization enabled ;-) */
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6046e6f..b17886f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -462,7 +462,7 @@ struct descriptor_table {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	void (*hardware_enable)(void *dummy);      /* __init */
+	int (*hardware_enable)(void *dummy);
 	void (*hardware_disable)(void *dummy);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a5f90c7..2f3a388 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage)
 	cpu_svm_disable();
 }
 
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
 {
 
 	struct svm_cpu_data *svm_data;
@@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage)
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
+	rdmsrl(MSR_EFER, efer);
+	if (efer & EFER_SVME)
+		return -EBUSY;
+
 	if (!has_svm()) {
 		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
-		return;
+		return -EINVAL;
 	}
 	svm_data = per_cpu(svm_data, me);
 
 	if (!svm_data) {
 		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
 		       me);
-		return;
+		return -EINVAL;
 	}
 
 	svm_data->asid_generation = 1;
@@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage)
 	gdt = (struct desc_struct *)gdt_descr.base;
 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
-	rdmsrl(MSR_EFER, efer);
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
 
 	wrmsrl(MSR_VM_HSAVE_PA,
 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+
+	return 0;
 }
 
 static void svm_cpu_uninit(int cpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d3213ac..3fe0d42 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void)
 	/* locked but not enabled */
 }
 
-static void hardware_enable(void *garbage)
+static int hardware_enable(void *garbage)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 	u64 old;
 
+	if (read_cr4() & X86_CR4_VMXE)
+		return -EBUSY;
+
 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
 	if ((old & (FEATURE_CONTROL_LOCKED |
@@ -1158,6 +1161,10 @@ static void hardware_enable(void *garbage)
 	asm volatile (ASM_VMX_VMXON_RAX
 		      : : "a"(&phys_addr), "m"(phys_addr)
 		      : "memory", "cc");
+
+	ept_sync_global();
+
+	return 0;
 }
 
 static void vmclear_local_vcpus(void)
@@ -4040,8 +4047,6 @@ static int __init vmx_init(void)
 	if (bypass_guest_pf)
 		kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
 
-	ept_sync_global();
-
 	return 0;
 
 out3:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 891234b..ec16169 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4703,9 +4703,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
-	kvm_x86_ops->hardware_enable(garbage);
+	return kvm_x86_ops->hardware_enable(garbage);
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3e57be4..0bf9ee9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -346,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
-void kvm_arch_hardware_enable(void *garbage);
+int kvm_arch_hardware_enable(void *garbage);
 void kvm_arch_hardware_disable(void *garbage);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6ce5ef3..e27b7a9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
+static int kvm_usage_count = 0;
+static atomic_t hardware_enable_failed;
 
 struct kmem_cache *kvm_vcpu_cache;
 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
+static int hardware_enable_all(void);
+static void hardware_disable_all(void);
 
 static bool kvm_rebooting;
 
@@ -326,6 +330,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 
 static struct kvm *kvm_create_vm(void)
 {
+	int r = 0;
 	struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct page *page;
@@ -333,6 +338,11 @@ static struct kvm *kvm_create_vm(void)
 
 	if (IS_ERR(kvm))
 		goto out;
+
+	r = hardware_enable_all();
+	if (r)
+		goto out_err_nodisable;
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
@@ -341,8 +351,8 @@ static struct kvm *kvm_create_vm(void)
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
-		kfree(kvm);
-		return ERR_PTR(-ENOMEM);
+		r = -ENOMEM;
+		goto out_err;
 	}
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -350,15 +360,13 @@ static struct kvm *kvm_create_vm(void)
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	{
-		int err;
 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
-		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
-		if (err) {
+		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+		if (r) {
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 			put_page(page);
 #endif
-			kfree(kvm);
-			return ERR_PTR(err);
+			goto out_err;
 		}
 	}
 #endif
@@ -382,6 +390,12 @@ static struct kvm *kvm_create_vm(void)
 #endif
 out:
 	return kvm;
+
+out_err:
+	hardware_disable_all();
+out_err_nodisable:
+	kfree(kvm);
+	return ERR_PTR(r);
 }
 
 /*
@@ -440,6 +454,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_flush_shadow(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
+	hardware_disable_all();
 	mmdrop(mm);
 }
 
@@ -1631,11 +1646,21 @@ static struct miscdevice kvm_dev = {
 static void hardware_enable(void *junk)
 {
 	int cpu = raw_smp_processor_id();
+	int r;
 
 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
+
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_enable(NULL);
+
+	r = kvm_arch_hardware_enable(NULL);
+
+	if (r) {
+		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
+		atomic_inc(&hardware_enable_failed);
+		printk(KERN_INFO "kvm: enabling virtualization on "
+				 "CPU%d failed\n", cpu);
+	}
 }
 
 static void hardware_disable(void *junk)
@@ -1648,11 +1673,52 @@ static void hardware_disable(void *junk)
 	kvm_arch_hardware_disable(NULL);
 }
 
+static void hardware_disable_all_nolock(void)
+{
+	BUG_ON(!kvm_usage_count);
+
+	kvm_usage_count--;
+	if (!kvm_usage_count)
+		on_each_cpu(hardware_disable, NULL, 1);
+}
+
+static void hardware_disable_all(void)
+{
+	spin_lock(&kvm_lock);
+	hardware_disable_all_nolock();
+	spin_unlock(&kvm_lock);
+}
+
+static int hardware_enable_all(void)
+{
+	int r = 0;
+
+	spin_lock(&kvm_lock);
+
+	kvm_usage_count++;
+	if (kvm_usage_count == 1) {
+		atomic_set(&hardware_enable_failed, 0);
+		on_each_cpu(hardware_enable, NULL, 1);
+
+		if (atomic_read(&hardware_enable_failed)) {
+			hardware_disable_all_nolock();
+			r = -EBUSY;
+		}
+	}
+
+	spin_unlock(&kvm_lock);
+
+	return r;
+}
+
 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 			   void *v)
 {
 	int cpu = (long)v;
 
+	if (!kvm_usage_count)
+		return NOTIFY_OK;
+
 	val &= ~CPU_TASKS_FROZEN;
 	switch (val) {
 	case CPU_DYING:
@@ -1855,13 +1921,15 @@ static void kvm_exit_debug(void)
 
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 {
-	hardware_disable(NULL);
+	if (kvm_usage_count)
+		hardware_disable(NULL);
 	return 0;
 }
 
 static int kvm_resume(struct sys_device *dev)
 {
-	hardware_enable(NULL);
+	if (kvm_usage_count)
+		hardware_enable(NULL);
 	return 0;
 }
 
@@ -1938,7 +2006,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 			goto out_free_1;
 	}
 
-	on_each_cpu(hardware_enable, NULL, 1);
 	r = register_cpu_notifier(&kvm_cpu_notifier);
 	if (r)
 		goto out_free_2;
@@ -1986,7 +2053,6 @@ out_free_3:
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_2:
-	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
 out_free_0a:
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-03-17 15:48   ` Alexander Graf
@ 2009-03-18  6:43     ` Avi Kivity
  0 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2009-03-18  6:43 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm

Alexander Graf wrote:
>>
>> Please make sure cpu hotplug/hotunplug (and this suspend/resume) 
>> still work.
>
> Make sure as in test?

I use this:

#!/usr/bin/python

import commands, time, random

processors = int(commands.getoutput('getconf _NPROCESSORS_CONF'))

def fname(cpu):
    return '/sys/devices/system/cpu/cpu%s/online' % (cpu,)

def get_state(cpu):
    return int(file(fname(cpu)).read())

def set_state(cpu, state):
        file(fname(cpu), 'w').write(str(state))

try:
    while True:
        cpu = random.randint(1, processors - 1)
        state = 1 - get_state(cpu)
        set_state(cpu, state)
        time.sleep(1)
except KeyboardInterrupt:
    for cpu in range(1, processors):
        if not get_state(cpu):
            set_state(cpu, 1)


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-03-17 12:04 ` Avi Kivity
@ 2009-03-17 15:48   ` Alexander Graf
  2009-03-18  6:43     ` Avi Kivity
  0 siblings, 1 reply; 13+ messages in thread
From: Alexander Graf @ 2009-03-17 15:48 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm


On 17.03.2009, at 13:04, Avi Kivity <avi@redhat.com> wrote:

> Alexander Graf wrote:
>> X86 CPUs need to have some magic happening to enable the  
>> virtualization
>> extensions on them. This magic can result in unpleasant results for
>> users, like blocking other VMMs from working (vmx) or using invalid  
>> TLB
>> entries (svm).
>>
>> Currently KVM activates virtualization when the respective kernel  
>> module
>> is loaded. This blocks us from autoloading KVM modules without  
>> breaking
>> other VMMs.
>>
>> To circumvent this problem at least a bit, this patch introduces on
>> demand activation of virtualization. This means, that instead
>> virtualization is enabled on creation of the first virtual machine
>> and disabled on destruction of the last one.
>>
>> So using this, KVM can be easily autoloaded, while keeping other
>> hypervisors usable.
>>
>
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 68b217e..7c40743 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -65,6 +65,8 @@ DEFINE_SPINLOCK(kvm_lock);
>> LIST_HEAD(vm_list);
>>  static cpumask_var_t cpus_hardware_enabled;
>> +static int kvm_usage_count = 0;
>> +static DEFINE_SPINLOCK(kvm_usage_lock);
>>
>
> Please use kvm_lock for this.

Looks good. Will do :-).

>
>
>> @@ -2327,14 +2341,40 @@ static struct miscdevice kvm_dev = {
>>    &kvm_chardev_ops,
>> };
>> -static void hardware_enable(void *junk)
>> +static void hardware_enable(void *_r)
>> {
>>    int cpu = raw_smp_processor_id();
>> +    int r;
>> +
>> +    /* If enabling a previous CPU failed already, let's not  
>> continue */
>> +    if (_r && *((int*)_r))
>> +        return;
>>        if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
>>        return;
>> +    r = kvm_arch_hardware_enable(NULL);
>> +    if (_r)
>> +        *((int*)_r) = r;
>>
>
> Racy.  If one cpu succeeds and another fails, the successful one  
> could overwrite the failing one's result.
>
> While the race will never happen (start two VMMs simultaneously) it  
> will cause an endless stream of complaints.  Let's use an atomic_t  
> incremented on each failure.
>
> Oh, and it can be global since we're inside a lock, so some of the  
> changes to add a return value become unnecessary.

Right, that probably cleans up things a bit.

>
>
>> +static void hardware_disable_all(void)
>> +{
>> +    if (!kvm_usage_count)
>> +        return;
>>
>
> Can this happen?

It should not. Better make it a BUG(...)?

>
>
>> +
>> +    spin_lock(&kvm_usage_lock);
>> +    kvm_usage_count--;
>> +    if (!kvm_usage_count)
>> +        on_each_cpu(hardware_disable, NULL, 1);
>> +    spin_unlock(&kvm_usage_lock);
>> +}
>> +
>>
>
> Please make sure cpu hotplug/hotunplug (and this suspend/resume)  
> still work.

Make sure as in test?

Alex

>
>
> -- 
> error compiling committee.c: too
> many arguments to function
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Activate Virtualization On Demand
  2009-03-17  8:47 Alexander Graf
@ 2009-03-17 12:04 ` Avi Kivity
  2009-03-17 15:48   ` Alexander Graf
  0 siblings, 1 reply; 13+ messages in thread
From: Avi Kivity @ 2009-03-17 12:04 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm

Alexander Graf wrote:
> X86 CPUs need to have some magic happening to enable the virtualization
> extensions on them. This magic can result in unpleasant results for
> users, like blocking other VMMs from working (vmx) or using invalid TLB
> entries (svm).
>
> Currently KVM activates virtualization when the respective kernel module
> is loaded. This blocks us from autoloading KVM modules without breaking
> other VMMs.
>
> To circumvent this problem at least a bit, this patch introduces on
> demand activation of virtualization. This means, that instead
> virtualization is enabled on creation of the first virtual machine
> and disabled on destruction of the last one.
>
> So using this, KVM can be easily autoloaded, while keeping other
> hypervisors usable.
>   

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 68b217e..7c40743 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -65,6 +65,8 @@ DEFINE_SPINLOCK(kvm_lock);
>  LIST_HEAD(vm_list);
>  
>  static cpumask_var_t cpus_hardware_enabled;
> +static int kvm_usage_count = 0;
> +static DEFINE_SPINLOCK(kvm_usage_lock);
>   

Please use kvm_lock for this.

>  
> @@ -2327,14 +2341,40 @@ static struct miscdevice kvm_dev = {
>  	&kvm_chardev_ops,
>  };
>  
> -static void hardware_enable(void *junk)
> +static void hardware_enable(void *_r)
>  {
>  	int cpu = raw_smp_processor_id();
> +	int r;
> +
> +	/* If enabling a previous CPU failed already, let's not continue */
> +	if (_r && *((int*)_r))
> +		return;
>   
>  
>  	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
>  		return;
> +	r = kvm_arch_hardware_enable(NULL);
> +	if (_r)
> +		*((int*)_r) = r;
>   

Racy.  If one cpu succeeds and another fails, the successful one could 
overwrite the failing one's result.

While the race will never happen (start two VMMs simultaneously) it will 
cause an endless stream of complaints.  Let's use an atomic_t 
incremented on each failure.

Oh, and it can be global since we're inside a lock, so some of the 
changes to add a return value become unnecessary.

>  
> +static void hardware_disable_all(void)
> +{
> +	if (!kvm_usage_count)
> +		return;
>   

Can this happen?

> +
> +	spin_lock(&kvm_usage_lock);
> +	kvm_usage_count--;
> +	if (!kvm_usage_count)
> +		on_each_cpu(hardware_disable, NULL, 1);
> +	spin_unlock(&kvm_usage_lock);
> +}
> +
>   

Please make sure cpu hotplug/hotunplug (and this suspend/resume) still work.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] Activate Virtualization On Demand
@ 2009-03-17  8:47 Alexander Graf
  2009-03-17 12:04 ` Avi Kivity
  0 siblings, 1 reply; 13+ messages in thread
From: Alexander Graf @ 2009-03-17  8:47 UTC (permalink / raw)
  To: kvm

X86 CPUs need to have some magic happening to enable the virtualization
extensions on them. This magic can result in unpleasant results for
users, like blocking other VMMs from working (vmx) or using invalid TLB
entries (svm).

Currently KVM activates virtualization when the respective kernel module
is loaded. This blocks us from autoloading KVM modules without breaking
other VMMs.

To circumvent this problem at least a bit, this patch introduces on
demand activation of virtualization. This means, that instead
virtualization is enabled on creation of the first virtual machine
and disabled on destruction of the last one.

So using this, KVM can be easily autoloaded, while keeping other
hypervisors usable.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/ia64/kvm/kvm-ia64.c        |    8 +++--
 arch/powerpc/kvm/powerpc.c      |    2 +-
 arch/s390/kvm/kvm-s390.c        |    2 +-
 arch/x86/include/asm/kvm_host.h |    2 +-
 arch/x86/kvm/svm.c              |   13 +++++--
 arch/x86/kvm/vmx.c              |    7 +++-
 arch/x86/kvm/x86.c              |    4 +-
 include/linux/kvm_host.h        |    2 +-
 virt/kvm/kvm_main.c             |   72 +++++++++++++++++++++++++++++++++------
 9 files changed, 87 insertions(+), 25 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index c25347f..df6bab1 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	long  status;
 	long  tmp_base;
@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
 	local_irq_restore(saved_psr);
 	if (slot < 0)
-		return;
+		return -EINVAL;
 
 	spin_lock(&vp_lock);
 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
 	if (status != 0) {
 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return ;
+		return -EINVAL;
 	}
 
 	if (!kvm_vsa_base) {
@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
 	}
 	spin_unlock(&vp_lock);
 	ia64_ptr_entry(0x3, slot);
+
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9057335..6558ab7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -80,7 +80,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index cbfe91e..a14e676 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -70,7 +70,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 
 /* Section: not file related */
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
 	/* every s390 is virtualization enabled ;-) */
 }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4627627..72d5075 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -463,7 +463,7 @@ struct descriptor_table {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	void (*hardware_enable)(void *dummy);      /* __init */
+	int (*hardware_enable)(void *dummy);
 	void (*hardware_disable)(void *dummy);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1821c20..06aeb7f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -262,7 +262,7 @@ static void svm_hardware_disable(void *garbage)
 	cpu_svm_disable();
 }
 
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
 {
 
 	struct svm_cpu_data *svm_data;
@@ -271,16 +271,20 @@ static void svm_hardware_enable(void *garbage)
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
+	rdmsrl(MSR_EFER, efer);
+	if (efer & EFER_SVME)
+		return -EBUSY;
+
 	if (!has_svm()) {
 		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
-		return;
+		return -EINVAL;
 	}
 	svm_data = per_cpu(svm_data, me);
 
 	if (!svm_data) {
 		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
 		       me);
-		return;
+		return -EINVAL;
 	}
 
 	svm_data->asid_generation = 1;
@@ -291,11 +295,12 @@ static void svm_hardware_enable(void *garbage)
 	gdt = (struct desc_struct *)gdt_descr.address;
 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
-	rdmsrl(MSR_EFER, efer);
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
 
 	wrmsrl(MSR_VM_HSAVE_PA,
 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+
+	return 0;
 }
 
 static void svm_cpu_uninit(int cpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5cf28df..b40ca0d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1068,12 +1068,15 @@ static __init int vmx_disabled_by_bios(void)
 	/* locked but not enabled */
 }
 
-static void hardware_enable(void *garbage)
+static int hardware_enable(void *garbage)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 	u64 old;
 
+	if (read_cr4() & X86_CR4_VMXE)
+		return -EBUSY;
+
 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
 	if ((old & (FEATURE_CONTROL_LOCKED |
@@ -1088,6 +1091,8 @@ static void hardware_enable(void *garbage)
 	asm volatile (ASM_VMX_VMXON_RAX
 		      : : "a"(&phys_addr), "m"(phys_addr)
 		      : "memory", "cc");
+
+	return 0;
 }
 
 static void vmclear_local_vcpus(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2ea8262..1fd716d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4220,9 +4220,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
-	kvm_x86_ops->hardware_enable(garbage);
+	return kvm_x86_ops->hardware_enable(garbage);
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 11eb702..7678995 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -292,7 +292,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
-void kvm_arch_hardware_enable(void *garbage);
+int kvm_arch_hardware_enable(void *garbage);
 void kvm_arch_hardware_disable(void *garbage);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 68b217e..7c40743 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -65,6 +65,8 @@ DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
+static int kvm_usage_count = 0;
+static DEFINE_SPINLOCK(kvm_usage_lock);
 
 struct kmem_cache *kvm_vcpu_cache;
 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -75,6 +77,8 @@ struct dentry *kvm_debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
+static int hardware_enable_all(void);
+static void hardware_disable_all(void);
 
 static bool kvm_rebooting;
 
@@ -931,6 +935,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 
 static struct kvm *kvm_create_vm(void)
 {
+	int r = 0;
 	struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct page *page;
@@ -938,6 +943,11 @@ static struct kvm *kvm_create_vm(void)
 
 	if (IS_ERR(kvm))
 		goto out;
+
+	r = hardware_enable_all();
+	if (r)
+		goto out_err;
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_LIST_HEAD(&kvm->irq_routing);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -946,8 +956,8 @@ static struct kvm *kvm_create_vm(void)
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
-		kfree(kvm);
-		return ERR_PTR(-ENOMEM);
+		r = -ENOMEM;
+		goto out_err;
 	}
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -955,15 +965,13 @@ static struct kvm *kvm_create_vm(void)
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	{
-		int err;
 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
-		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
-		if (err) {
+		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+		if (r) {
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 			put_page(page);
 #endif
-			kfree(kvm);
-			return ERR_PTR(err);
+			goto out_err;
 		}
 	}
 #endif
@@ -984,6 +992,11 @@ static struct kvm *kvm_create_vm(void)
 #endif
 out:
 	return kvm;
+
+out_err:
+	hardware_disable_all();
+	kfree(kvm);
+	return ERR_PTR(r);
 }
 
 /*
@@ -1034,6 +1047,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #endif
 	kvm_arch_destroy_vm(kvm);
+	hardware_disable_all();
 	mmdrop(mm);
 }
 
@@ -2327,14 +2341,40 @@ static struct miscdevice kvm_dev = {
 	&kvm_chardev_ops,
 };
 
-static void hardware_enable(void *junk)
+static void hardware_enable(void *_r)
 {
 	int cpu = raw_smp_processor_id();
+	int r;
+
+	/* If enabling a previous CPU failed already, let's not continue */
+	if (_r && *((int*)_r))
+		return;
 
 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
+	r = kvm_arch_hardware_enable(NULL);
+	if (_r)
+		*((int*)_r) = r;
+	if (r) {
+		printk(KERN_INFO "kvm: enabling virtualization on "
+				 "CPU%d failed\n", cpu);
+		return;
+	}
+
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_enable(NULL);
+}
+
+static int hardware_enable_all(void)
+{
+	int r = 0;
+
+	spin_lock(&kvm_usage_lock);
+	kvm_usage_count++;
+	if (kvm_usage_count == 1)
+		on_each_cpu(hardware_enable, &r, 1);
+	spin_unlock(&kvm_usage_lock);
+
+	return r;
 }
 
 static void hardware_disable(void *junk)
@@ -2347,6 +2387,18 @@ static void hardware_disable(void *junk)
 	kvm_arch_hardware_disable(NULL);
 }
 
+static void hardware_disable_all(void)
+{
+	if (!kvm_usage_count)
+		return;
+
+	spin_lock(&kvm_usage_lock);
+	kvm_usage_count--;
+	if (!kvm_usage_count)
+		on_each_cpu(hardware_disable, NULL, 1);
+	spin_unlock(&kvm_usage_lock);
+}
+
 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 			   void *v)
 {
@@ -2594,7 +2646,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 			goto out_free_1;
 	}
 
-	on_each_cpu(hardware_enable, NULL, 1);
 	r = register_cpu_notifier(&kvm_cpu_notifier);
 	if (r)
 		goto out_free_2;
@@ -2642,7 +2693,6 @@ out_free_3:
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_2:
-	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
 out_free_0a:
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-09-15  9:36 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-09 14:18 [PATCH] Activate Virtualization On Demand Alexander Graf
2009-09-14  5:05 ` Avi Kivity
2009-09-14 13:23 ` Marcelo Tosatti
2009-09-14 15:52   ` Alexander Graf
2009-09-14 16:14     ` Marcelo Tosatti
2009-09-14 16:25       ` Alexander Graf
2009-09-14 16:46         ` Marcelo Tosatti
2009-09-14 16:54           ` Alexander Graf
  -- strict thread matches above, loose matches on Subject: below --
2009-09-15  9:37 Alexander Graf
2009-03-17  8:47 Alexander Graf
2009-03-17 12:04 ` Avi Kivity
2009-03-17 15:48   ` Alexander Graf
2009-03-18  6:43     ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.