All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
@ 2021-11-22 17:58 Vitaly Kuznetsov
  2021-11-22 17:58 ` [PATCH 1/2] KVM: selftests: Avoid KVM_SET_CPUID2 after KVM_RUN in hyperv_features test Vitaly Kuznetsov
  2021-11-22 17:58 ` [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN Vitaly Kuznetsov
  0 siblings, 2 replies; 36+ messages in thread
From: Vitaly Kuznetsov @ 2021-11-22 17:58 UTC (permalink / raw)
  To: kvm, Paolo Bonzini
  Cc: Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Commit 63f5a1909f9e ("KVM: x86: Alert userspace that KVM_SET_CPUID{,2}
after KVM_RUN is broken") officially deprecated KVM_SET_CPUID{,2} ioctls
after first successful KVM_RUN and promissed to make this sequence forbiden
in 5.16. TO fulfil the promise 'hyperv_features' selftest needs to be fixed
first.

Vitaly Kuznetsov (2):
  KVM: selftests: Avoid KVM_SET_CPUID2 after KVM_RUN in hyperv_features
    test
  KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN

 arch/x86/kvm/mmu/mmu.c                        |  20 +--
 arch/x86/kvm/x86.c                            |  27 ++++
 .../selftests/kvm/x86_64/hyperv_features.c    | 140 +++++++++---------
 3 files changed, 101 insertions(+), 86 deletions(-)

-- 
2.33.1


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 1/2] KVM: selftests: Avoid KVM_SET_CPUID2 after KVM_RUN in hyperv_features test
  2021-11-22 17:58 [PATCH 0/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN Vitaly Kuznetsov
@ 2021-11-22 17:58 ` Vitaly Kuznetsov
  2021-11-22 17:58 ` [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN Vitaly Kuznetsov
  1 sibling, 0 replies; 36+ messages in thread
From: Vitaly Kuznetsov @ 2021-11-22 17:58 UTC (permalink / raw)
  To: kvm, Paolo Bonzini
  Cc: Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

hyperv_features's sole purpose is to test access to various Hyper-V MSRs
and hypercalls with different CPUID data. As KVM_SET_CPUID2 after KVM_RUN
is deprecated and soon-to-be forbidden, avoid it by re-creating test VM
for each sub-test.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 .../selftests/kvm/x86_64/hyperv_features.c    | 140 +++++++++---------
 1 file changed, 71 insertions(+), 69 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 91d88aaa9899..672915ce73d8 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
 	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
 }
 
-static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
-				   struct kvm_cpuid2 *best)
+static void guest_test_msrs_access(void)
 {
 	struct kvm_run *run;
+	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0, r;
 	struct kvm_cpuid_entry2 feat = {
@@ -180,11 +180,34 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
 	struct kvm_cpuid_entry2 dbg = {
 		.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
 	};
-	struct kvm_enable_cap cap = {0};
-
-	run = vcpu_state(vm, VCPU_ID);
+	struct kvm_cpuid2 *best;
+	vm_vaddr_t msr_gva;
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+		.args = {1}
+	};
+	struct msr_data *msr;
 
 	while (true) {
+		vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+		msr_gva = vm_vaddr_alloc_page(vm);
+		memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+		msr = addr_gva2hva(vm, msr_gva);
+
+		vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+		vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+		vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+		best = kvm_get_supported_hv_cpuid();
+
+		vm_init_descriptor_tables(vm);
+		vcpu_init_descriptor_tables(vm, VCPU_ID);
+		vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+		run = vcpu_state(vm, VCPU_ID);
+
 		switch (stage) {
 		case 0:
 			/*
@@ -315,6 +338,7 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
 			 * capability enabled and guest visible CPUID bit unset.
 			 */
 			cap.cap = KVM_CAP_HYPERV_SYNIC2;
+			cap.args[0] = 0;
 			vcpu_enable_cap(vm, VCPU_ID, &cap);
 			break;
 		case 22:
@@ -461,9 +485,9 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_SYNC:
-			TEST_ASSERT(uc.args[1] == stage,
-				    "Unexpected stage: %ld (%d expected)\n",
-				    uc.args[1], stage);
+			TEST_ASSERT(uc.args[1] == 0,
+				    "Unexpected stage: %ld (0 expected)\n",
+				    uc.args[1]);
 			break;
 		case UCALL_ABORT:
 			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -474,13 +498,14 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
 		}
 
 		stage++;
+		kvm_vm_free(vm);
 	}
 }
 
-static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
-				     void *input, void *output, struct kvm_cpuid2 *best)
+static void guest_test_hcalls_access(void)
 {
 	struct kvm_run *run;
+	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0, r;
 	struct kvm_cpuid_entry2 feat = {
@@ -493,10 +518,38 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
 	struct kvm_cpuid_entry2 dbg = {
 		.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
 	};
-
-	run = vcpu_state(vm, VCPU_ID);
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+		.args = {1}
+	};
+	vm_vaddr_t hcall_page, hcall_params;
+	struct hcall_data *hcall;
+	struct kvm_cpuid2 *best;
 
 	while (true) {
+		vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+		vm_init_descriptor_tables(vm);
+		vcpu_init_descriptor_tables(vm, VCPU_ID);
+		vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+		/* Hypercall input/output */
+		hcall_page = vm_vaddr_alloc_pages(vm, 2);
+		hcall = addr_gva2hva(vm, hcall_page);
+		memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+		hcall_params = vm_vaddr_alloc_page(vm);
+		memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+
+		vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+		vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+		vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+		best = kvm_get_supported_hv_cpuid();
+
+		run = vcpu_state(vm, VCPU_ID);
+
 		switch (stage) {
 		case 0:
 			hcall->control = 0xdeadbeef;
@@ -606,9 +659,9 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_SYNC:
-			TEST_ASSERT(uc.args[1] == stage,
-				    "Unexpected stage: %ld (%d expected)\n",
-				    uc.args[1], stage);
+			TEST_ASSERT(uc.args[1] == 0,
+				    "Unexpected stage: %ld (0 expected)\n",
+				    uc.args[1]);
 			break;
 		case UCALL_ABORT:
 			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -619,66 +672,15 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
 		}
 
 		stage++;
+		kvm_vm_free(vm);
 	}
 }
 
 int main(void)
 {
-	struct kvm_cpuid2 *best;
-	struct kvm_vm *vm;
-	vm_vaddr_t msr_gva, hcall_page, hcall_params;
-	struct kvm_enable_cap cap = {
-		.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
-		.args = {1}
-	};
-
-	/* Test MSRs */
-	vm = vm_create_default(VCPU_ID, 0, guest_msr);
-
-	msr_gva = vm_vaddr_alloc_page(vm);
-	memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
-	vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
-	vcpu_enable_cap(vm, VCPU_ID, &cap);
-
-	vcpu_set_hv_cpuid(vm, VCPU_ID);
-
-	best = kvm_get_supported_hv_cpuid();
-
-	vm_init_descriptor_tables(vm);
-	vcpu_init_descriptor_tables(vm, VCPU_ID);
-	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
 	pr_info("Testing access to Hyper-V specific MSRs\n");
-	guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
-			       best);
-	kvm_vm_free(vm);
-
-	/* Test hypercalls */
-	vm = vm_create_default(VCPU_ID, 0, guest_hcall);
-
-	vm_init_descriptor_tables(vm);
-	vcpu_init_descriptor_tables(vm, VCPU_ID);
-	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-
-	/* Hypercall input/output */
-	hcall_page = vm_vaddr_alloc_pages(vm, 2);
-	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-	hcall_params = vm_vaddr_alloc_page(vm);
-	memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
-
-	vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
-	vcpu_enable_cap(vm, VCPU_ID, &cap);
-
-	vcpu_set_hv_cpuid(vm, VCPU_ID);
-
-	best = kvm_get_supported_hv_cpuid();
+	guest_test_msrs_access();
 
 	pr_info("Testing access to Hyper-V hypercalls\n");
-	guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
-				 addr_gva2hva(vm, hcall_page),
-				 addr_gva2hva(vm, hcall_page) + getpagesize(),
-				 best);
-
-	kvm_vm_free(vm);
+	guest_test_hcalls_access();
 }
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2021-11-22 17:58 [PATCH 0/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN Vitaly Kuznetsov
  2021-11-22 17:58 ` [PATCH 1/2] KVM: selftests: Avoid KVM_SET_CPUID2 after KVM_RUN in hyperv_features test Vitaly Kuznetsov
@ 2021-11-22 17:58 ` Vitaly Kuznetsov
  2021-11-26 12:20   ` Paolo Bonzini
  1 sibling, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2021-11-22 17:58 UTC (permalink / raw)
  To: kvm, Paolo Bonzini
  Cc: Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Commit 63f5a1909f9e ("KVM: x86: Alert userspace that KVM_SET_CPUID{,2}
after KVM_RUN is broken") officially deprecated KVM_SET_CPUID{,2} ioctls
after first successful KVM_RUN and promissed to make this sequence forbiden
in 5.16. It's time to fulfil the promise.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 20 +++-----------------
 arch/x86/kvm/x86.c     | 27 +++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3be9beea838d..669e86688cbf 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5032,24 +5032,10 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 
 	/*
-	 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
-	 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
-	 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
-	 * faults due to reusing SPs/SPTEs.  Alert userspace, but otherwise
-	 * sweep the problem under the rug.
-	 *
-	 * KVM's horrific CPUID ABI makes the problem all but impossible to
-	 * solve, as correctly handling multiple vCPU models (with respect to
-	 * paging and physical address properties) in a single VM would require
-	 * tracking all relevant CPUID information in kvm_mmu_page_role.  That
-	 * is very undesirable as it would double the memory requirements for
-	 * gfn_track (see struct kvm_mmu_page_role comments), and in practice
-	 * no sane VMM mucks with the core vCPU model on the fly.
+	 * Changing guest CPUID after KVM_RUN is forbidden, see the comment in
+	 * kvm_arch_vcpu_ioctl().
 	 */
-	if (vcpu->arch.last_vmentry_cpu != -1) {
-		pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
-		pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
-	}
+	KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
 }
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a403d92833f..3cfaccc24efb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5125,6 +5125,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_cpuid cpuid;
 
 		r = -EFAULT;
+
+		/*
+		 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+		 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+		 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
+		 * faults due to reusing SPs/SPTEs.  Alert userspace, but otherwise
+		 * sweep the problem under the rug.
+		 *
+		 * KVM's horrific CPUID ABI makes the problem all but impossible to
+		 * solve, as correctly handling multiple vCPU models (with respect to
+		 * paging and physical address properties) in a single VM would require
+		 * tracking all relevant CPUID information in kvm_mmu_page_role.  That
+		 * is very undesirable as it would double the memory requirements for
+		 * gfn_track (see struct kvm_mmu_page_role comments), and in practice
+		 * no sane VMM mucks with the core vCPU model on the fly.
+		 */
+		if (vcpu->arch.last_vmentry_cpu != -1)
+			goto out;
+
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
 		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
@@ -5135,6 +5154,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_cpuid2 cpuid;
 
 		r = -EFAULT;
+
+		/*
+		 * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
+		 * KVM_SET_CPUID case above.
+		 */
+		if (vcpu->arch.last_vmentry_cpu != -1)
+			goto out;
+
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
 		r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2021-11-22 17:58 ` [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN Vitaly Kuznetsov
@ 2021-11-26 12:20   ` Paolo Bonzini
  2021-12-27 17:32     ` Igor Mammedov
  0 siblings, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2021-11-26 12:20 UTC (permalink / raw)
  To: Vitaly Kuznetsov, kvm
  Cc: Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

On 11/22/21 18:58, Vitaly Kuznetsov wrote:
> -	 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
> -	 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
> -	 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
> -	 * faults due to reusing SPs/SPTEs.  Alert userspace, but otherwise
> -	 * sweep the problem under the rug.
> -	 *
> -	 * KVM's horrific CPUID ABI makes the problem all but impossible to
> -	 * solve, as correctly handling multiple vCPU models (with respect to
> -	 * paging and physical address properties) in a single VM would require
> -	 * tracking all relevant CPUID information in kvm_mmu_page_role.  That
> -	 * is very undesirable as it would double the memory requirements for
> -	 * gfn_track (see struct kvm_mmu_page_role comments), and in practice
> -	 * no sane VMM mucks with the core vCPU model on the fly.
> +	 * Changing guest CPUID after KVM_RUN is forbidden, see the comment in
> +	 * kvm_arch_vcpu_ioctl().
>   	 */

The second part of the comment still applies to kvm_mmu_after_set_cpuid 
more than to kvm_arch_vcpu_ioctl().

>  		r = -EFAULT;
> [...]
> +		if (vcpu->arch.last_vmentry_cpu != -1)
> +			goto out;
> +
>  		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
>  			goto out;
>  		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);

This should be an EINVAL.

Tweaked and queued nevertheless, thanks.

Paolo


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2021-11-26 12:20   ` Paolo Bonzini
@ 2021-12-27 17:32     ` Igor Mammedov
  2022-01-02 17:31       ` Paolo Bonzini
  0 siblings, 1 reply; 36+ messages in thread
From: Igor Mammedov @ 2021-12-27 17:32 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Vitaly Kuznetsov, kvm, Sean Christopherson, Wanpeng Li,
	Jim Mattson, linux-kernel

On Fri, 26 Nov 2021 13:20:28 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 11/22/21 18:58, Vitaly Kuznetsov wrote:
> > -	 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
> > -	 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
> > -	 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
> > -	 * faults due to reusing SPs/SPTEs.  Alert userspace, but otherwise
> > -	 * sweep the problem under the rug.
> > -	 *
> > -	 * KVM's horrific CPUID ABI makes the problem all but impossible to
> > -	 * solve, as correctly handling multiple vCPU models (with respect to
> > -	 * paging and physical address properties) in a single VM would require
> > -	 * tracking all relevant CPUID information in kvm_mmu_page_role.  That
> > -	 * is very undesirable as it would double the memory requirements for
> > -	 * gfn_track (see struct kvm_mmu_page_role comments), and in practice
> > -	 * no sane VMM mucks with the core vCPU model on the fly.
> > +	 * Changing guest CPUID after KVM_RUN is forbidden, see the comment in
> > +	 * kvm_arch_vcpu_ioctl().
> >   	 */  
> 
> The second part of the comment still applies to kvm_mmu_after_set_cpuid 
> more than to kvm_arch_vcpu_ioctl().
> 
> >  		r = -EFAULT;
> > [...]
> > +		if (vcpu->arch.last_vmentry_cpu != -1)
> > +			goto out;
> > +
> >  		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
> >  			goto out;
> >  		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);  
> 
> This should be an EINVAL.
> 
> Tweaked and queued nevertheless, thanks.

it seems this patch breaks VCPU hotplug, in scenario:

  1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
  2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)

RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11

> 
> Paolo
> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2021-12-27 17:32     ` Igor Mammedov
@ 2022-01-02 17:31       ` Paolo Bonzini
  2022-01-03  8:04         ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2022-01-02 17:31 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Vitaly Kuznetsov, kvm, Sean Christopherson, Wanpeng Li,
	Jim Mattson, linux-kernel

On 12/27/21 18:32, Igor Mammedov wrote:
>> Tweaked and queued nevertheless, thanks.
> it seems this patch breaks VCPU hotplug, in scenario:
> 
>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> 
> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> 

The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
the data passed to the ioctl is the same that was set before.

Paolo


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-02 17:31       ` Paolo Bonzini
@ 2022-01-03  8:04         ` Vitaly Kuznetsov
  2022-01-03  9:40           ` Igor Mammedov
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-03  8:04 UTC (permalink / raw)
  To: Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 12/27/21 18:32, Igor Mammedov wrote:
>>> Tweaked and queued nevertheless, thanks.
>> it seems this patch breaks VCPU hotplug, in scenario:
>> 
>>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
>>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
>> 
>> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
>> 
>
> The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> the data passed to the ioctl is the same that was set before.

Are we sure the data is going to be *exactly* the same? In particular,
when using vCPU fds from the parked list, do we keep the same
APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
different id?

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-03  8:04         ` Vitaly Kuznetsov
@ 2022-01-03  9:40           ` Igor Mammedov
  2022-01-03 12:56             ` Vitaly Kuznetsov
  2022-01-13 22:33             ` Sean Christopherson
  0 siblings, 2 replies; 36+ messages in thread
From: Igor Mammedov @ 2022-01-03  9:40 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Paolo Bonzini, kvm, Sean Christopherson, Wanpeng Li, Jim Mattson,
	linux-kernel

On Mon, 03 Jan 2022 09:04:29 +0100
Vitaly Kuznetsov <vkuznets@redhat.com> wrote:

> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
> > On 12/27/21 18:32, Igor Mammedov wrote:  
> >>> Tweaked and queued nevertheless, thanks.  
> >> it seems this patch breaks VCPU hotplug, in scenario:
> >> 
> >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
> >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> >> 
> >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> >>   
> >
> > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> > the data passed to the ioctl is the same that was set before.  
> 
> Are we sure the data is going to be *exactly* the same? In particular,
> when using vCPU fds from the parked list, do we keep the same
> APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
> different id?

If I recall it right, it can be a different ID easily.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-03  9:40           ` Igor Mammedov
@ 2022-01-03 12:56             ` Vitaly Kuznetsov
  2022-01-05  8:17               ` Igor Mammedov
  2022-01-05  9:10               ` Paolo Bonzini
  2022-01-13 22:33             ` Sean Christopherson
  1 sibling, 2 replies; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-03 12:56 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Paolo Bonzini, kvm, Sean Christopherson, Wanpeng Li, Jim Mattson,
	linux-kernel

Igor Mammedov <imammedo@redhat.com> writes:

> On Mon, 03 Jan 2022 09:04:29 +0100
> Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
>> Paolo Bonzini <pbonzini@redhat.com> writes:
>> 
>> > On 12/27/21 18:32, Igor Mammedov wrote:  
>> >>> Tweaked and queued nevertheless, thanks.  
>> >> it seems this patch breaks VCPU hotplug, in scenario:
>> >> 
>> >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
>> >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
>> >> 
>> >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
>> >>   
>> >
>> > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
>> > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
>> > the data passed to the ioctl is the same that was set before.  
>> 
>> Are we sure the data is going to be *exactly* the same? In particular,
>> when using vCPU fds from the parked list, do we keep the same
>> APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
>> different id?
>
> If I recall it right, it can be a different ID easily.
>

It's broken then. I'd suggest we revert the patch from KVM and think
about the strategy how to proceed. Going forward, we really want to ban
KVM_SET_CPUID{,2} after KVM_RUN (see the comment which my patch moves).
E.g. we can have an 'allowlist' of things which can change (and put
*APICids there) and only fail KVM_SET_CPUID{,2} when we see something
else changing. In QEMU, we can search the parked CPUs list for an entry
with the right *APICid and reuse it only if we manage to find one.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-03 12:56             ` Vitaly Kuznetsov
@ 2022-01-05  8:17               ` Igor Mammedov
  2022-01-05  9:12                 ` Vitaly Kuznetsov
  2022-01-05  9:10               ` Paolo Bonzini
  1 sibling, 1 reply; 36+ messages in thread
From: Igor Mammedov @ 2022-01-05  8:17 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Paolo Bonzini, kvm, Sean Christopherson, Wanpeng Li, Jim Mattson,
	linux-kernel

On Mon, 03 Jan 2022 13:56:53 +0100
Vitaly Kuznetsov <vkuznets@redhat.com> wrote:

> Igor Mammedov <imammedo@redhat.com> writes:
> 
> > On Mon, 03 Jan 2022 09:04:29 +0100
> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> >  
> >> Paolo Bonzini <pbonzini@redhat.com> writes:
> >>   
> >> > On 12/27/21 18:32, Igor Mammedov wrote:    
> >> >>> Tweaked and queued nevertheless, thanks.    
> >> >> it seems this patch breaks VCPU hotplug, in scenario:
> >> >> 
> >> >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
> >> >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> >> >> 
> >> >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> >> >>     
> >> >
> >> > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> >> > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> >> > the data passed to the ioctl is the same that was set before.    
> >> 
> >> Are we sure the data is going to be *exactly* the same? In particular,
> >> when using vCPU fds from the parked list, do we keep the same
> >> APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
> >> different id?  
> >
> > If I recall it right, it can be a different ID easily.
> >  
> 
> It's broken then. I'd suggest we revert the patch from KVM and think
> about the strategy how to proceed.

Can you post a patch, then?

> Going forward, we really want to ban
> KVM_SET_CPUID{,2} after KVM_RUN (see the comment which my patch moves).
> E.g. we can have an 'allowlist' of things which can change (and put
> *APICids there) and only fail KVM_SET_CPUID{,2} when we see something
> else changing.

It might work, at least on QEMU side we do not allow mixing up CPU models
within VM instance (so far). So aside of APICid (and related leafs
(extended APIC ID/possibly other topo related stuff)) nothing else should
change ever when a new vCPU is hotplugged.

> In QEMU, we can search the parked CPUs list for an entry
> with the right *APICid and reuse it only if we manage to find one.
In QEMU, 'parked cpus' fd list is a generic code shared by all supported
archs. And I'm reluctant to push something x86 specific there (it's not
impossible, but it's a crutch to workaround forbidden KVM_SET_CPUID{,2}).


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-03 12:56             ` Vitaly Kuznetsov
  2022-01-05  8:17               ` Igor Mammedov
@ 2022-01-05  9:10               ` Paolo Bonzini
  2022-01-05 10:09                 ` Vitaly Kuznetsov
  1 sibling, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2022-01-05  9:10 UTC (permalink / raw)
  To: Vitaly Kuznetsov, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

On 1/3/22 13:56, Vitaly Kuznetsov wrote:
>   'allowlist' of things which can change (and put
> *APICids there) and only fail KVM_SET_CPUID{,2} when we see something
> else changing.

We could also go the other way and only deny changes that result in 
changed CPU caps.  That should be easier to implement since we have 
already a mapping from CPU capability words to CPUID leaves and registers.

Paolo


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-05  8:17               ` Igor Mammedov
@ 2022-01-05  9:12                 ` Vitaly Kuznetsov
  0 siblings, 0 replies; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-05  9:12 UTC (permalink / raw)
  To: Igor Mammedov, Paolo Bonzini
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Igor Mammedov <imammedo@redhat.com> writes:

> On Mon, 03 Jan 2022 13:56:53 +0100
> Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
>> Igor Mammedov <imammedo@redhat.com> writes:
>> 
>> > On Mon, 03 Jan 2022 09:04:29 +0100
>> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>> >  
>> >> Paolo Bonzini <pbonzini@redhat.com> writes:
>> >>   
>> >> > On 12/27/21 18:32, Igor Mammedov wrote:    
>> >> >>> Tweaked and queued nevertheless, thanks.    
>> >> >> it seems this patch breaks VCPU hotplug, in scenario:
>> >> >> 
>> >> >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
>> >> >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
>> >> >> 
>> >> >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
>> >> >>     
>> >> >
>> >> > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
>> >> > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
>> >> > the data passed to the ioctl is the same that was set before.    
>> >> 
>> >> Are we sure the data is going to be *exactly* the same? In particular,
>> >> when using vCPU fds from the parked list, do we keep the same
>> >> APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
>> >> different id?  
>> >
>> > If I recall it right, it can be a different ID easily.
>> >  
>> 
>> It's broken then. I'd suggest we revert the patch from KVM and think
>> about the strategy how to proceed.
>
> Can you post a patch, then?
>

Paolo,

would you like to send a last minute revert to Linus to save 5.16 ...

>> Going forward, we really want to ban
>> KVM_SET_CPUID{,2} after KVM_RUN (see the comment which my patch moves).
>> E.g. we can have an 'allowlist' of things which can change (and put
>> *APICids there) and only fail KVM_SET_CPUID{,2} when we see something
>> else changing.
>
> It might work, at least on QEMU side we do not allow mixing up CPU models
> within VM instance (so far). So aside of APICid (and related leafs
> (extended APIC ID/possibly other topo related stuff)) nothing else should
> change ever when a new vCPU is hotplugged.

or should we just focus on this (or similar) solution (for 5.17 and
stable@5.16)?

>
>> In QEMU, we can search the parked CPUs list for an entry
>> with the right *APICid and reuse it only if we manage to find one.
> In QEMU, 'parked cpus' fd list is a generic code shared by all supported
> archs. And I'm reluctant to push something x86 specific there (it's not
> impossible, but it's a crutch to workaround forbidden KVM_SET_CPUID{,2}).
>

You can see it this was: vCPU fd corresponds to the particular CPU being
hot plugged/unplugged, not to any CPU in the guest system. The change
may be generic enough then (but it's not going to save existing QEMUs of
course).

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-05  9:10               ` Paolo Bonzini
@ 2022-01-05 10:09                 ` Vitaly Kuznetsov
  2022-01-07  9:02                   ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-05 10:09 UTC (permalink / raw)
  To: Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 1/3/22 13:56, Vitaly Kuznetsov wrote:
>>   'allowlist' of things which can change (and put
>> *APICids there) and only fail KVM_SET_CPUID{,2} when we see something
>> else changing.
>
> We could also go the other way and only deny changes that result in 
> changed CPU caps.  That should be easier to implement since we have 
> already a mapping from CPU capability words to CPUID leaves and registers.
>

Good idea, I'll look into it (if noone beats me to it).

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-05 10:09                 ` Vitaly Kuznetsov
@ 2022-01-07  9:02                   ` Vitaly Kuznetsov
  2022-01-07 18:15                     ` Paolo Bonzini
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-07  9:02 UTC (permalink / raw)
  To: Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Vitaly Kuznetsov <vkuznets@redhat.com> writes:

> Paolo Bonzini <pbonzini@redhat.com> writes:
>
>> On 1/3/22 13:56, Vitaly Kuznetsov wrote:
>>>   'allowlist' of things which can change (and put
>>> *APICids there) and only fail KVM_SET_CPUID{,2} when we see something
>>> else changing.
>>
>> We could also go the other way and only deny changes that result in 
>> changed CPU caps.  That should be easier to implement since we have 
>> already a mapping from CPU capability words to CPUID leaves and registers.
>>
>
> Good idea, I'll look into it (if noone beats me to it).

(just getting to it)

On the other hand, e.g. MAXPHYADDR (CPUID 0x80000008.EAX) is not a CPU
cap but it's one of the main reasons why we want to forbid
KVM_SET_CPUID{,2} after KVM_RUN in the first place. I'm also not sure
about allowing PV feature bits changes (KVM, Hyper-V, Xen) and I don't
think there's a need for that.

I'm again leaning towards an allowlist and currently I see only two
candidates:

CPUID.01H.EBX bits 31:24 (initial LAPIC id)
CPUID.0BH.EDX (x2APIC id)

Anything else I'm missing?

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-07  9:02                   ` Vitaly Kuznetsov
@ 2022-01-07 18:15                     ` Paolo Bonzini
  2022-01-11  8:00                       ` Igor Mammedov
  0 siblings, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2022-01-07 18:15 UTC (permalink / raw)
  To: Vitaly Kuznetsov, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

On 1/7/22 10:02, Vitaly Kuznetsov wrote:
> 
> I'm again leaning towards an allowlist and currently I see only two
> candidates:
> 
> CPUID.01H.EBX bits 31:24 (initial LAPIC id)
> CPUID.0BH.EDX (x2APIC id)
> 
> Anything else I'm missing?

I would also ignore completely CPUID leaves 03H, 04H, 0BH, 80000005h, 
80000006h, 8000001Dh, 8000001Eh (cache and processor topology), just to 
err on the safe side.

We could change kvm_find_cpuid_entry to WARN if any of those leaves are 
passed.

Paolo


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-07 18:15                     ` Paolo Bonzini
@ 2022-01-11  8:00                       ` Igor Mammedov
  2022-01-12 13:58                         ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Igor Mammedov @ 2022-01-11  8:00 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Vitaly Kuznetsov, kvm, Sean Christopherson, Wanpeng Li,
	Jim Mattson, linux-kernel

On Fri, 7 Jan 2022 19:15:43 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 1/7/22 10:02, Vitaly Kuznetsov wrote:
> > 
> > I'm again leaning towards an allowlist and currently I see only two
> > candidates:
> > 
> > CPUID.01H.EBX bits 31:24 (initial LAPIC id)
> > CPUID.0BH.EDX (x2APIC id)
> > 
> > Anything else I'm missing?  
> 
> I would also ignore completely CPUID leaves 03H, 04H, 0BH, 80000005h, 
> 80000006h, 8000001Dh, 8000001Eh (cache and processor topology), just to 
> err on the safe side.

on top of that,

1Fh

> We could change kvm_find_cpuid_entry to WARN if any of those leaves are 
> passed.
> 
> Paolo
> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-11  8:00                       ` Igor Mammedov
@ 2022-01-12 13:58                         ` Vitaly Kuznetsov
  2022-01-12 18:39                           ` Paolo Bonzini
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-12 13:58 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel,
	Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 793 bytes --]

Igor Mammedov <imammedo@redhat.com> writes:

> On Fri, 7 Jan 2022 19:15:43 +0100
> Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>> On 1/7/22 10:02, Vitaly Kuznetsov wrote:
>> > 
>> > I'm again leaning towards an allowlist and currently I see only two
>> > candidates:
>> > 
>> > CPUID.01H.EBX bits 31:24 (initial LAPIC id)
>> > CPUID.0BH.EDX (x2APIC id)
>> > 
>> > Anything else I'm missing?  
>> 
>> I would also ignore completely CPUID leaves 03H, 04H, 0BH, 80000005h, 
>> 80000006h, 8000001Dh, 8000001Eh (cache and processor topology), just to 
>> err on the safe side.
>
> on top of that,
>
> 1Fh
>

The implementation turned out to be a bit more complex as kvm also
mangles CPUIDs so we need to account for that. Could you give the
attached series a spin to see if it works?

-- 
Vitaly


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-KVM-x86-Fix-indentation-in-kvm_set_cpuid.patch --]
[-- Type: text/x-patch, Size: 1416 bytes --]

From 9b7d89c0a86f52e404278a5dfd86521bff278d17 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 12 Jan 2022 14:41:24 +0100
Subject: [PATCH RFC 1/3] KVM: x86: Fix indentation in kvm_set_cpuid()

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/cpuid.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 07e9215e911d..89af3c7390d3 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -276,21 +276,21 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
 static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
                         int nent)
 {
-    int r;
+	int r;
 
-    r = kvm_check_cpuid(e2, nent);
-    if (r)
-        return r;
+	r = kvm_check_cpuid(e2, nent);
+	if (r)
+		return r;
 
-    kvfree(vcpu->arch.cpuid_entries);
-    vcpu->arch.cpuid_entries = e2;
-    vcpu->arch.cpuid_nent = nent;
+	kvfree(vcpu->arch.cpuid_entries);
+	vcpu->arch.cpuid_entries = e2;
+	vcpu->arch.cpuid_nent = nent;
 
-    kvm_update_kvm_cpuid_base(vcpu);
-    kvm_update_cpuid_runtime(vcpu);
-    kvm_vcpu_after_set_cpuid(vcpu);
+	kvm_update_kvm_cpuid_base(vcpu);
+	kvm_update_cpuid_runtime(vcpu);
+	kvm_vcpu_after_set_cpuid(vcpu);
 
-    return 0;
+	return 0;
 }
 
 /* when an old userspace process fills a new kernel module */
-- 
2.34.1


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0002-KVM-x86-Do-runtime-CPUID-update-before-updating-vcpu.patch --]
[-- Type: text/x-patch, Size: 4057 bytes --]

From c735aa9b4375d37dbd61c7c655d6b007d7d1962c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 12 Jan 2022 14:27:54 +0100
Subject: [PATCH RFC 2/3] KVM: x86: Do runtime CPUID update before updating
 vcpu->arch.cpuid_entries

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/cpuid.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 89af3c7390d3..16f4083edeeb 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -125,14 +125,21 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
 	}
 }
 
-static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
+					      struct kvm_cpuid_entry2 *entries, int nent)
 {
 	u32 base = vcpu->arch.kvm_cpuid_base;
 
 	if (!base)
 		return NULL;
 
-	return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
+	return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0);
+}
+
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+{
+	return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
+					     vcpu->arch.cpuid_nent);
 }
 
 void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
@@ -147,11 +154,12 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
 		vcpu->arch.pv_cpuid.features = best->eax;
 }
 
-void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
+				       int nent)
 {
 	struct kvm_cpuid_entry2 *best;
 
-	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	best = cpuid_entry2_find(entries, nent, 1, 0);
 	if (best) {
 		/* Update OSXSAVE bit */
 		if (boot_cpu_has(X86_FEATURE_XSAVE))
@@ -162,33 +170,39 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
 			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
 	}
 
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	best = cpuid_entry2_find(entries, nent, 7, 0);
 	if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
 		cpuid_entry_change(best, X86_FEATURE_OSPKE,
 				   kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
 
-	best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+	best = cpuid_entry2_find(entries, nent, 0xD, 0);
 	if (best)
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
 
-	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+	best = cpuid_entry2_find(entries, nent, 0xD, 1);
 	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
 		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
-	best = kvm_find_kvm_cpuid_features(vcpu);
+	best = __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
+					     vcpu->arch.cpuid_nent);
 	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
 		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
 		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
 
 	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
-		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+		best = cpuid_entry2_find(entries, nent, 0x1, 0);
 		if (best)
 			cpuid_entry_change(best, X86_FEATURE_MWAIT,
 					   vcpu->arch.ia32_misc_enable_msr &
 					   MSR_IA32_MISC_ENABLE_MWAIT);
 	}
 }
+
+void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+{
+	__kvm_update_cpuid_runtime(vcpu, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+}
 EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
 
 static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
@@ -278,6 +292,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
 {
 	int r;
 
+	__kvm_update_cpuid_runtime(vcpu, e2, nent);
+
 	r = kvm_check_cpuid(e2, nent);
 	if (r)
 		return r;
@@ -287,7 +303,6 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
 	vcpu->arch.cpuid_nent = nent;
 
 	kvm_update_kvm_cpuid_base(vcpu);
-	kvm_update_cpuid_runtime(vcpu);
 	kvm_vcpu_after_set_cpuid(vcpu);
 
 	return 0;
-- 
2.34.1


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: 0003-KVM-x86-Partially-allow-KVM_SET_CPUID-2-after-KVM_RU.patch --]
[-- Type: text/x-patch, Size: 4786 bytes --]

From f29f2c4e48540f3e1214a6ecdd49510465d2d234 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 12 Jan 2022 12:51:01 +0100
Subject: [PATCH RFC 3/3] KVM: x86: Partially allow KVM_SET_CPUID{,2} after
 KVM_RUN for CPU hotplug

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/cpuid.c | 69 +++++++++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c   | 19 ------------
 2 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 16f4083edeeb..0f130d686323 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -80,9 +80,11 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
 	return NULL;
 }
 
-static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
+static int kvm_check_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
+			   int nent, bool is_update)
 {
-	struct kvm_cpuid_entry2 *best;
+	struct kvm_cpuid_entry2 *best, *e;
+	int i;
 
 	/*
 	 * The existing code assumes virtual address is 48-bit or 57-bit in the
@@ -96,6 +98,58 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
 			return -EINVAL;
 	}
 
+	if (!is_update)
+		return 0;
+
+	/*
+	 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+	 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+	 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
+	 * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with
+	 * the core vCPU model on the fly. It would've been better to forbid any
+	 * KVM_SET_CPUID{,2} calls after KVM_RUN altogether but unfortunately
+	 * some VMMs (e.g. QEMU) reuse vCPU fds for CPU hotplug/unplug and they
+	 * need to set CPUID to e.g. change [x2]APIC id. Implement an allowlist
+	 * of CPUIDs which are allowed to change.
+	 */
+	for (i = 0; i < nent; i++) {
+		e = &entries[i];
+
+		best = kvm_find_cpuid_entry(vcpu, e->function, e->index);
+		if (!best)
+			return -EINVAL;
+
+		switch (e->function) {
+		case 0x1:
+			/* Only initial LAPIC id is allowed to change */
+			if (e->eax ^ best->eax || ((e->ebx ^ best->ebx) >> 24) ||
+			    e->ecx ^ best->ecx || e->edx ^ best->edx)
+				return -EINVAL;
+			break;
+		case 0x3:
+			/* processor serial number */
+		case 0x4:
+			/* cache parameters */
+		case 0xb:
+		case 0x1f:
+			/* x2APIC id and CPU topology */
+		case 0x80000005:
+			/* AMD l1 cache information */
+		case 0x80000006:
+			/* l2 cache information */
+		case 0x8000001d:
+			/* AMD cache topology */
+		case 0x8000001e:
+			/* AMD processor topology */
+			break;
+		default:
+			if (e->eax ^ best->eax || e->ebx ^ best->ebx ||
+			    e->ecx ^ best->ecx || e->edx ^ best->edx)
+				return -EINVAL;
+			break;
+		}
+	}
+
 	return 0;
 }
 
@@ -291,10 +345,11 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
                         int nent)
 {
 	int r;
+	bool is_update = vcpu->arch.last_vmentry_cpu != -1;
 
 	__kvm_update_cpuid_runtime(vcpu, e2, nent);
 
-	r = kvm_check_cpuid(e2, nent);
+	r = kvm_check_cpuid(vcpu, e2, nent, is_update);
 	if (r)
 		return r;
 
@@ -303,7 +358,13 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
 	vcpu->arch.cpuid_nent = nent;
 
 	kvm_update_kvm_cpuid_base(vcpu);
-	kvm_vcpu_after_set_cpuid(vcpu);
+
+	/*
+	 * KVM_SET_CPUID{,2} after KVM_RUN is not allowed to change vCPU features, see
+	 * kvm_check_cpuid().
+	 */
+	if (!is_update)
+		kvm_vcpu_after_set_cpuid(vcpu);
 
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e50e97ac4408..285d563af856 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5148,17 +5148,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
 
-		/*
-		 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
-		 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
-		 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
-		 * faults due to reusing SPs/SPTEs.  In practice no sane VMM mucks with
-		 * the core vCPU model on the fly, so fail.
-		 */
-		r = -EINVAL;
-		if (vcpu->arch.last_vmentry_cpu != -1)
-			goto out;
-
 		r = -EFAULT;
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
@@ -5169,14 +5158,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_cpuid2 __user *cpuid_arg = argp;
 		struct kvm_cpuid2 cpuid;
 
-		/*
-		 * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
-		 * KVM_SET_CPUID case above.
-		 */
-		r = -EINVAL;
-		if (vcpu->arch.last_vmentry_cpu != -1)
-			goto out;
-
 		r = -EFAULT;
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-12 13:58                         ` Vitaly Kuznetsov
@ 2022-01-12 18:39                           ` Paolo Bonzini
  2022-01-13  9:27                             ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2022-01-12 18:39 UTC (permalink / raw)
  To: Vitaly Kuznetsov, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

On 1/12/22 14:58, Vitaly Kuznetsov wrote:
> -	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> +	best = cpuid_entry2_find(entries, nent, 0xD, 1);
>   	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
>   		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
>   		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>   
> -	best = kvm_find_kvm_cpuid_features(vcpu);
> +	best = __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
> +					     vcpu->arch.cpuid_nent);
>   	if (kvm_hlt_in_guest(vcpu->kvm) && best &&

I think this should be __kvm_find_kvm_cpuid_features(vcpu, entries, nent).

> 
> +		case 0x1:
> +			/* Only initial LAPIC id is allowed to change */
> +			if (e->eax ^ best->eax || ((e->ebx ^ best->ebx) >> 24) ||
> +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
> +				return -EINVAL;
> +			break;

This XOR is a bit weird.  In addition the EBX test is checking the wrong 
bits (it checks whether 31:24 change and ignores changes to 23:0).

You can write just "(e->ebx & ~0xff000000u) != (best->ebx ~0xff000000u)".

> 
> +		default:
> +			if (e->eax ^ best->eax || e->ebx ^ best->ebx ||
> +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
> +				return -EINVAL;

This one even more so.

Paolo


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-12 18:39                           ` Paolo Bonzini
@ 2022-01-13  9:27                             ` Vitaly Kuznetsov
  2022-01-13 14:28                               ` Maxim Levitsky
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-13  9:27 UTC (permalink / raw)
  To: Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 1/12/22 14:58, Vitaly Kuznetsov wrote:
>> -	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
>> +	best = cpuid_entry2_find(entries, nent, 0xD, 1);
>>   	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
>>   		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
>>   		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>>   
>> -	best = kvm_find_kvm_cpuid_features(vcpu);
>> +	best = __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
>> +					     vcpu->arch.cpuid_nent);
>>   	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
>
> I think this should be __kvm_find_kvm_cpuid_features(vcpu, entries, nent).
>

Of course.

>> 
>> +		case 0x1:
>> +			/* Only initial LAPIC id is allowed to change */
>> +			if (e->eax ^ best->eax || ((e->ebx ^ best->ebx) >> 24) ||
>> +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
>> +				return -EINVAL;
>> +			break;
>
> This XOR is a bit weird.  In addition the EBX test is checking the wrong 
> bits (it checks whether 31:24 change and ignores changes to 23:0).

Indeed, however, I've tested CPU hotplug with QEMU trying different
CPUs in random order and surprisingly othing blew up, feels like QEMU
was smart enough to re-use the right fd)

>
> You can write just "(e->ebx & ~0xff000000u) != (best->ebx ~0xff000000u)".
>
>> 
>> +		default:
>> +			if (e->eax ^ best->eax || e->ebx ^ best->ebx ||
>> +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
>> +				return -EINVAL;
>
> This one even more so.

Thanks for the early review, I'm going to prepare a selftest and send
this out.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13  9:27                             ` Vitaly Kuznetsov
@ 2022-01-13 14:28                               ` Maxim Levitsky
  2022-01-13 14:36                                 ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Maxim Levitsky @ 2022-01-13 14:28 UTC (permalink / raw)
  To: Vitaly Kuznetsov, Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

On Thu, 2022-01-13 at 10:27 +0100, Vitaly Kuznetsov wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
> > On 1/12/22 14:58, Vitaly Kuznetsov wrote:
> > > -	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> > > +	best = cpuid_entry2_find(entries, nent, 0xD, 1);
> > >   	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
> > >   		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
> > >   		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> > >   
> > > -	best = kvm_find_kvm_cpuid_features(vcpu);
> > > +	best = __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
> > > +					     vcpu->arch.cpuid_nent);
> > >   	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
> > 
> > I think this should be __kvm_find_kvm_cpuid_features(vcpu, entries, nent).
> > 
> 
> Of course.
> 
> > > +		case 0x1:
> > > +			/* Only initial LAPIC id is allowed to change */
> > > +			if (e->eax ^ best->eax || ((e->ebx ^ best->ebx) >> 24) ||
> > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
> > > +				return -EINVAL;
> > > +			break;
> > 
> > This XOR is a bit weird.  In addition the EBX test is checking the wrong 
> > bits (it checks whether 31:24 change and ignores changes to 23:0).
> 
> Indeed, however, I've tested CPU hotplug with QEMU trying different
> CPUs in random order and surprisingly othing blew up, feels like QEMU
> was smart enough to re-use the right fd)
> 
> > You can write just "(e->ebx & ~0xff000000u) != (best->ebx ~0xff000000u)".
> > 
> > > +		default:
> > > +			if (e->eax ^ best->eax || e->ebx ^ best->ebx ||
> > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
> > > +				return -EINVAL;
> > 
> > This one even more so.
> 
> Thanks for the early review, I'm going to prepare a selftest and send
> this out.
> 
I also looked at this recently (due to other reasons) and I found out that
qemu picks a parked vcpu by its vcpu_id which is its initial apic id,
thus apic id related features should not change.

Take a look at 'kvm_get_vcpu' in qemu source.
Maybe old qemu versions didn't do this?

Best regards,
Thanks,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13 14:28                               ` Maxim Levitsky
@ 2022-01-13 14:36                                 ` Vitaly Kuznetsov
  2022-01-13 14:41                                   ` Maxim Levitsky
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-13 14:36 UTC (permalink / raw)
  To: Maxim Levitsky, Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Maxim Levitsky <mlevitsk@redhat.com> writes:

> On Thu, 2022-01-13 at 10:27 +0100, Vitaly Kuznetsov wrote:
>> Paolo Bonzini <pbonzini@redhat.com> writes:
>> 
>> > On 1/12/22 14:58, Vitaly Kuznetsov wrote:
>> > > -	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
>> > > +	best = cpuid_entry2_find(entries, nent, 0xD, 1);
>> > >   	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
>> > >   		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
>> > >   		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>> > >   
>> > > -	best = kvm_find_kvm_cpuid_features(vcpu);
>> > > +	best = __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
>> > > +					     vcpu->arch.cpuid_nent);
>> > >   	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
>> > 
>> > I think this should be __kvm_find_kvm_cpuid_features(vcpu, entries, nent).
>> > 
>> 
>> Of course.
>> 
>> > > +		case 0x1:
>> > > +			/* Only initial LAPIC id is allowed to change */
>> > > +			if (e->eax ^ best->eax || ((e->ebx ^ best->ebx) >> 24) ||
>> > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
>> > > +				return -EINVAL;
>> > > +			break;
>> > 
>> > This XOR is a bit weird.  In addition the EBX test is checking the wrong 
>> > bits (it checks whether 31:24 change and ignores changes to 23:0).
>> 
>> Indeed, however, I've tested CPU hotplug with QEMU trying different
>> CPUs in random order and surprisingly othing blew up, feels like QEMU
>> was smart enough to re-use the right fd)
>> 
>> > You can write just "(e->ebx & ~0xff000000u) != (best->ebx ~0xff000000u)".
>> > 
>> > > +		default:
>> > > +			if (e->eax ^ best->eax || e->ebx ^ best->ebx ||
>> > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
>> > > +				return -EINVAL;
>> > 
>> > This one even more so.
>> 
>> Thanks for the early review, I'm going to prepare a selftest and send
>> this out.
>> 
> I also looked at this recently (due to other reasons) and I found out that
> qemu picks a parked vcpu by its vcpu_id which is its initial apic id,
> thus apic id related features should not change.
>
> Take a look at 'kvm_get_vcpu' in qemu source.
> Maybe old qemu versions didn't do this?

I took Igor's word on this, I didn't check QEMU code :-)

In the v1 I've just sent [L,x2]APIC ids are allowed to change. This
shouldn't screw the MMU (which was the main motivation for forbidding
KVM_SET_CPUID{,2} after KVM_RUN in the first place) but maybe we don't
really need to be so permissive.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13 14:36                                 ` Vitaly Kuznetsov
@ 2022-01-13 14:41                                   ` Maxim Levitsky
  2022-01-13 14:59                                     ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Maxim Levitsky @ 2022-01-13 14:41 UTC (permalink / raw)
  To: Vitaly Kuznetsov, Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

On Thu, 2022-01-13 at 15:36 +0100, Vitaly Kuznetsov wrote:
> Maxim Levitsky <mlevitsk@redhat.com> writes:
> 
> > On Thu, 2022-01-13 at 10:27 +0100, Vitaly Kuznetsov wrote:
> > > Paolo Bonzini <pbonzini@redhat.com> writes:
> > > 
> > > > On 1/12/22 14:58, Vitaly Kuznetsov wrote:
> > > > > -	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> > > > > +	best = cpuid_entry2_find(entries, nent, 0xD, 1);
> > > > >   	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
> > > > >   		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
> > > > >   		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> > > > >   
> > > > > -	best = kvm_find_kvm_cpuid_features(vcpu);
> > > > > +	best = __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
> > > > > +					     vcpu->arch.cpuid_nent);
> > > > >   	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
> > > > 
> > > > I think this should be __kvm_find_kvm_cpuid_features(vcpu, entries, nent).
> > > > 
> > > 
> > > Of course.
> > > 
> > > > > +		case 0x1:
> > > > > +			/* Only initial LAPIC id is allowed to change */
> > > > > +			if (e->eax ^ best->eax || ((e->ebx ^ best->ebx) >> 24) ||
> > > > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
> > > > > +				return -EINVAL;
> > > > > +			break;
> > > > 
> > > > This XOR is a bit weird.  In addition the EBX test is checking the wrong 
> > > > bits (it checks whether 31:24 change and ignores changes to 23:0).
> > > 
> > > Indeed, however, I've tested CPU hotplug with QEMU trying different
> > > CPUs in random order and surprisingly othing blew up, feels like QEMU
> > > was smart enough to re-use the right fd)
> > > 
> > > > You can write just "(e->ebx & ~0xff000000u) != (best->ebx ~0xff000000u)".
> > > > 
> > > > > +		default:
> > > > > +			if (e->eax ^ best->eax || e->ebx ^ best->ebx ||
> > > > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
> > > > > +				return -EINVAL;
> > > > 
> > > > This one even more so.
> > > 
> > > Thanks for the early review, I'm going to prepare a selftest and send
> > > this out.
> > > 
> > I also looked at this recently (due to other reasons) and I found out that
> > qemu picks a parked vcpu by its vcpu_id which is its initial apic id,
> > thus apic id related features should not change.
> > 
> > Take a look at 'kvm_get_vcpu' in qemu source.
> > Maybe old qemu versions didn't do this?
> 
> I took Igor's word on this, I didn't check QEMU code :-)
> 
> In the v1 I've just sent [L,x2]APIC ids are allowed to change. This
> shouldn't screw the MMU (which was the main motivation for forbidding
> KVM_SET_CPUID{,2} after KVM_RUN in the first place) but maybe we don't
> really need to be so permissive.
> 

For my nested AVIC work I would really want the APIC ID of a VCPU to be read-only
and be equal to vcpu_id.

That simplifies lot of things, and in practice it is hightly likely that no guests
change their APIC id, and likely that qemu doesn't as well.

Best regards,
	Maxim Levitsky





^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13 14:41                                   ` Maxim Levitsky
@ 2022-01-13 14:59                                     ` Vitaly Kuznetsov
  2022-01-13 16:26                                       ` Sean Christopherson
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-13 14:59 UTC (permalink / raw)
  To: Maxim Levitsky, Paolo Bonzini, Igor Mammedov
  Cc: kvm, Sean Christopherson, Wanpeng Li, Jim Mattson, linux-kernel

Maxim Levitsky <mlevitsk@redhat.com> writes:

> On Thu, 2022-01-13 at 15:36 +0100, Vitaly Kuznetsov wrote:
>> Maxim Levitsky <mlevitsk@redhat.com> writes:
>> 
>> > On Thu, 2022-01-13 at 10:27 +0100, Vitaly Kuznetsov wrote:
>> > > Paolo Bonzini <pbonzini@redhat.com> writes:
>> > > 
>> > > > On 1/12/22 14:58, Vitaly Kuznetsov wrote:
>> > > > > -	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
>> > > > > +	best = cpuid_entry2_find(entries, nent, 0xD, 1);
>> > > > >   	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
>> > > > >   		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
>> > > > >   		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>> > > > >   
>> > > > > -	best = kvm_find_kvm_cpuid_features(vcpu);
>> > > > > +	best = __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
>> > > > > +					     vcpu->arch.cpuid_nent);
>> > > > >   	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
>> > > > 
>> > > > I think this should be __kvm_find_kvm_cpuid_features(vcpu, entries, nent).
>> > > > 
>> > > 
>> > > Of course.
>> > > 
>> > > > > +		case 0x1:
>> > > > > +			/* Only initial LAPIC id is allowed to change */
>> > > > > +			if (e->eax ^ best->eax || ((e->ebx ^ best->ebx) >> 24) ||
>> > > > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
>> > > > > +				return -EINVAL;
>> > > > > +			break;
>> > > > 
>> > > > This XOR is a bit weird.  In addition the EBX test is checking the wrong 
>> > > > bits (it checks whether 31:24 change and ignores changes to 23:0).
>> > > 
>> > > Indeed, however, I've tested CPU hotplug with QEMU trying different
>> > > CPUs in random order and surprisingly othing blew up, feels like QEMU
>> > > was smart enough to re-use the right fd)
>> > > 
>> > > > You can write just "(e->ebx & ~0xff000000u) != (best->ebx ~0xff000000u)".
>> > > > 
>> > > > > +		default:
>> > > > > +			if (e->eax ^ best->eax || e->ebx ^ best->ebx ||
>> > > > > +			    e->ecx ^ best->ecx || e->edx ^ best->edx)
>> > > > > +				return -EINVAL;
>> > > > 
>> > > > This one even more so.
>> > > 
>> > > Thanks for the early review, I'm going to prepare a selftest and send
>> > > this out.
>> > > 
>> > I also looked at this recently (due to other reasons) and I found out that
>> > qemu picks a parked vcpu by its vcpu_id which is its initial apic id,
>> > thus apic id related features should not change.
>> > 
>> > Take a look at 'kvm_get_vcpu' in qemu source.
>> > Maybe old qemu versions didn't do this?
>> 
>> I took Igor's word on this, I didn't check QEMU code :-)
>> 
>> In the v1 I've just sent [L,x2]APIC ids are allowed to change. This
>> shouldn't screw the MMU (which was the main motivation for forbidding
>> KVM_SET_CPUID{,2} after KVM_RUN in the first place) but maybe we don't
>> really need to be so permissive.
>> 
>
> For my nested AVIC work I would really want the APIC ID of a VCPU to be read-only
> and be equal to vcpu_id.
>

Doesn't APIC ID have topology encoded in it?

> That simplifies lot of things, and in practice it is hightly likely that no guests
> change their APIC id, and likely that qemu doesn't as well.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13 14:59                                     ` Vitaly Kuznetsov
@ 2022-01-13 16:26                                       ` Sean Christopherson
  2022-01-13 16:30                                         ` Maxim Levitsky
  0 siblings, 1 reply; 36+ messages in thread
From: Sean Christopherson @ 2022-01-13 16:26 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Maxim Levitsky, Paolo Bonzini, Igor Mammedov, kvm, Wanpeng Li,
	Jim Mattson, linux-kernel

On Thu, Jan 13, 2022, Vitaly Kuznetsov wrote:
> Maxim Levitsky <mlevitsk@redhat.com> writes:
> > For my nested AVIC work I would really want the APIC ID of a VCPU to be read-only
> > and be equal to vcpu_id.
> >
> 
> Doesn't APIC ID have topology encoded in it?

Yeah, APIC IDs are derived from the topology.  From the SDM (this doesn't
talk about core/SMT info, but that's included as well):

  The hardware assigned APIC ID is based on system topology and includes encoding
  for socket position and cluster information.

The SDM also says:

  Some processors permit software to modify the APIC ID. However, the ability of
  software to modify the APIC ID is processor model specific.

So I _think_ we could define KVM behavior to ignore writes from the _guest_, but
the APIC_ID == vcpu_id requirement won't fly as userspace expects to be able to
stuff virtual toplogy info into the APIC ID.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13 16:26                                       ` Sean Christopherson
@ 2022-01-13 16:30                                         ` Maxim Levitsky
  0 siblings, 0 replies; 36+ messages in thread
From: Maxim Levitsky @ 2022-01-13 16:30 UTC (permalink / raw)
  To: Sean Christopherson, Vitaly Kuznetsov
  Cc: Paolo Bonzini, Igor Mammedov, kvm, Wanpeng Li, Jim Mattson, linux-kernel

On Thu, 2022-01-13 at 16:26 +0000, Sean Christopherson wrote:
> On Thu, Jan 13, 2022, Vitaly Kuznetsov wrote:
> > Maxim Levitsky <mlevitsk@redhat.com> writes:
> > > For my nested AVIC work I would really want the APIC ID of a VCPU to be read-only
> > > and be equal to vcpu_id.
> > > 
> > 
> > Doesn't APIC ID have topology encoded in it?
> 
> Yeah, APIC IDs are derived from the topology.  From the SDM (this doesn't
> talk about core/SMT info, but that's included as well):
> 
>   The hardware assigned APIC ID is based on system topology and includes encoding
>   for socket position and cluster information.
> 
> The SDM also says:
> 
>   Some processors permit software to modify the APIC ID. However, the ability of
>   software to modify the APIC ID is processor model specific.
> 
> So I _think_ we could define KVM behavior to ignore writes from the _guest_, but
> the APIC_ID == vcpu_id requirement won't fly as userspace expects to be able to
> stuff virtual toplogy info into the APIC ID.
> 
That is a very good piece of information! Thanks!

Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-03  9:40           ` Igor Mammedov
  2022-01-03 12:56             ` Vitaly Kuznetsov
@ 2022-01-13 22:33             ` Sean Christopherson
  2022-01-14  8:28               ` Maxim Levitsky
  2022-01-14  8:55               ` Igor Mammedov
  1 sibling, 2 replies; 36+ messages in thread
From: Sean Christopherson @ 2022-01-13 22:33 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Vitaly Kuznetsov, Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson,
	linux-kernel

On Mon, Jan 03, 2022, Igor Mammedov wrote:
> On Mon, 03 Jan 2022 09:04:29 +0100
> Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> 
> > Paolo Bonzini <pbonzini@redhat.com> writes:
> > 
> > > On 12/27/21 18:32, Igor Mammedov wrote:  
> > >>> Tweaked and queued nevertheless, thanks.  
> > >> it seems this patch breaks VCPU hotplug, in scenario:
> > >> 
> > >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
> > >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> > >> 
> > >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> > >>   
> > >
> > > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> > > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> > > the data passed to the ioctl is the same that was set before.  
> > 
> > Are we sure the data is going to be *exactly* the same? In particular,
> > when using vCPU fds from the parked list, do we keep the same
> > APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
> > different id?
> 
> If I recall it right, it can be a different ID easily.

No, it cannot.  KVM doesn't provide a way for userspace to change the APIC ID of
a vCPU after the vCPU is created.  x2APIC flat out disallows changing the APIC ID,
and unless there's magic I'm missing, apic_mmio_write() => kvm_lapic_reg_write()
is not reachable from userspace.

The only way for userspace to set the APIC ID is to change vcpu->vcpu_id, and that
can only be done at KVM_VCPU_CREATE.

So, reusing a parked vCPU for hotplug must reuse the same APIC ID.  QEMU handles
this by stashing the vcpu_id, a.k.a. APIC ID, when parking a vCPU, and reuses a
parked vCPU if and only if it has the same APIC ID.  And because QEMU derives the
APIC ID from topology, that means all the topology CPUID leafs must remain the
same, otherwise the guest is hosed because it will send IPIs to the wrong vCPUs.

  static int do_kvm_destroy_vcpu(CPUState *cpu)
  {
    struct KVMParkedVcpu *vcpu = NULL;

    ...

    vcpu = g_malloc0(sizeof(*vcpu));
    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); <=== stash the APIC ID when parking
    vcpu->kvm_fd = cpu->kvm_fd;
    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
err:
    return ret;
  }

  static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
  {
    struct KVMParkedVcpu *cpu;

    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
        if (cpu->vcpu_id == vcpu_id) {  <=== reuse if APIC ID matches
            int kvm_fd;

            QLIST_REMOVE(cpu, node);
            kvm_fd = cpu->kvm_fd;
            g_free(cpu);
            return kvm_fd;
        }
    }

    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
  }

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13 22:33             ` Sean Christopherson
@ 2022-01-14  8:28               ` Maxim Levitsky
  2022-01-14 16:08                 ` Sean Christopherson
  2022-01-14  8:55               ` Igor Mammedov
  1 sibling, 1 reply; 36+ messages in thread
From: Maxim Levitsky @ 2022-01-14  8:28 UTC (permalink / raw)
  To: Sean Christopherson, Igor Mammedov
  Cc: Vitaly Kuznetsov, Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson,
	linux-kernel

On Thu, 2022-01-13 at 22:33 +0000, Sean Christopherson wrote:
> On Mon, Jan 03, 2022, Igor Mammedov wrote:
> > On Mon, 03 Jan 2022 09:04:29 +0100
> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> > 
> > > Paolo Bonzini <pbonzini@redhat.com> writes:
> > > 
> > > > On 12/27/21 18:32, Igor Mammedov wrote:  
> > > > > > Tweaked and queued nevertheless, thanks.  
> > > > > it seems this patch breaks VCPU hotplug, in scenario:
> > > > > 
> > > > >    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
> > > > >    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> > > > > 
> > > > > RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> > > > >   
> > > > 
> > > > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> > > > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> > > > the data passed to the ioctl is the same that was set before.  
> > > 
> > > Are we sure the data is going to be *exactly* the same? In particular,
> > > when using vCPU fds from the parked list, do we keep the same
> > > APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
> > > different id?
> > 
> > If I recall it right, it can be a different ID easily.
> 
> No, it cannot.  KVM doesn't provide a way for userspace to change the APIC ID of
> a vCPU after the vCPU is created.  x2APIC flat out disallows changing the APIC ID,
> and unless there's magic I'm missing, apic_mmio_write() => kvm_lapic_reg_write()
> is not reachable from userspace.

So after all, it is true that vcpu_id == initial APIC_ID,
and if we don't let guest change it, it will be always like that? 


You said that its not true in the other mail in the thread. 
I haven't checked it in the code yet, as I never was much worried about userspace changing,
but I will check it soon.

I did a quick look and I see that at least the userspace can call 'kvm_apic_set_state' and it 
contains snapshot of all apic registers, including apic id.
However it would be very easy to add a check
there and fail if userspace attempts to
set APIC_ID != vcpu_id.


Best regards,
	Maxim Levitsky
> 
> The only way for userspace to set the APIC ID is to change vcpu->vcpu_id, and that
> can only be done at KVM_VCPU_CREATE.
> 
> So, reusing a parked vCPU for hotplug must reuse the same APIC ID.  QEMU handles
> this by stashing the vcpu_id, a.k.a. APIC ID, when parking a vCPU, and reuses a
> parked vCPU if and only if it has the same APIC ID.  And because QEMU derives the
> APIC ID from topology, that means all the topology CPUID leafs must remain the
> same, otherwise the guest is hosed because it will send IPIs to the wrong vCPUs.
> 
>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>   {
>     struct KVMParkedVcpu *vcpu = NULL;
> 
>     ...
> 
>     vcpu = g_malloc0(sizeof(*vcpu));
>     vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); <=== stash the APIC ID when parking
>     vcpu->kvm_fd = cpu->kvm_fd;
>     QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> err:
>     return ret;
>   }
> 
>   static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>   {
>     struct KVMParkedVcpu *cpu;
> 
>     QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
>         if (cpu->vcpu_id == vcpu_id) {  <=== reuse if APIC ID matches
>             int kvm_fd;
> 
>             QLIST_REMOVE(cpu, node);
>             kvm_fd = cpu->kvm_fd;
>             g_free(cpu);
>             return kvm_fd;
>         }
>     }
> 
>     return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>   }
> 



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-13 22:33             ` Sean Christopherson
  2022-01-14  8:28               ` Maxim Levitsky
@ 2022-01-14  8:55               ` Igor Mammedov
  2022-01-14  9:31                 ` Vitaly Kuznetsov
  1 sibling, 1 reply; 36+ messages in thread
From: Igor Mammedov @ 2022-01-14  8:55 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Vitaly Kuznetsov, Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson,
	linux-kernel

On Thu, 13 Jan 2022 22:33:38 +0000
Sean Christopherson <seanjc@google.com> wrote:

> On Mon, Jan 03, 2022, Igor Mammedov wrote:
> > On Mon, 03 Jan 2022 09:04:29 +0100
> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> >   
> > > Paolo Bonzini <pbonzini@redhat.com> writes:
> > >   
> > > > On 12/27/21 18:32, Igor Mammedov wrote:    
> > > >>> Tweaked and queued nevertheless, thanks.    
> > > >> it seems this patch breaks VCPU hotplug, in scenario:
> > > >> 
> > > >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
> > > >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> > > >> 
> > > >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> > > >>     
> > > >
> > > > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> > > > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> > > > the data passed to the ioctl is the same that was set before.    
> > > 
> > > Are we sure the data is going to be *exactly* the same? In particular,
> > > when using vCPU fds from the parked list, do we keep the same
> > > APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
> > > different id?  
> > 
> > If I recall it right, it can be a different ID easily.  
> 
> No, it cannot.  KVM doesn't provide a way for userspace to change the APIC ID of
> a vCPU after the vCPU is created.  x2APIC flat out disallows changing the APIC ID,
> and unless there's magic I'm missing, apic_mmio_write() => kvm_lapic_reg_write()
> is not reachable from userspace.
> 
> The only way for userspace to set the APIC ID is to change vcpu->vcpu_id, and that
> can only be done at KVM_VCPU_CREATE.
> 
> So, reusing a parked vCPU for hotplug must reuse the same APIC ID.  QEMU handles
> this by stashing the vcpu_id, a.k.a. APIC ID, when parking a vCPU, and reuses a
> parked vCPU if and only if it has the same APIC ID.  And because QEMU derives the
> APIC ID from topology, that means all the topology CPUID leafs must remain the
> same, otherwise the guest is hosed because it will send IPIs to the wrong vCPUs.

Indeed, I was wrong.
I just checked all cpu unplug history in qemu. It was introduced in qemu-2.7
and from the very beginning it did stash vcpu_id,
so there is no old QEMU that would re-plug VCPU with different apic_id.
Though tells us nothing about what other userspace implementations might do.

However, a problem of failing KVM_SET_CPUID2 during VCPU re-plug
is still there and re-plug will fail if KVM rejects repeated KVM_SET_CPUID2
even if ioctl called with exactly the same CPUID leafs as the 1st call.


>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>   {
>     struct KVMParkedVcpu *vcpu = NULL;
> 
>     ...
> 
>     vcpu = g_malloc0(sizeof(*vcpu));
>     vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); <=== stash the APIC ID when parking
>     vcpu->kvm_fd = cpu->kvm_fd;
>     QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> err:
>     return ret;
>   }
> 
>   static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>   {
>     struct KVMParkedVcpu *cpu;
> 
>     QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
>         if (cpu->vcpu_id == vcpu_id) {  <=== reuse if APIC ID matches
>             int kvm_fd;
> 
>             QLIST_REMOVE(cpu, node);
>             kvm_fd = cpu->kvm_fd;
>             g_free(cpu);
>             return kvm_fd;
>         }
>     }
> 
>     return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>   }
> 


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-14  8:55               ` Igor Mammedov
@ 2022-01-14  9:31                 ` Vitaly Kuznetsov
  2022-01-14 11:22                   ` Igor Mammedov
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-14  9:31 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson, linux-kernel,
	Sean Christopherson

Igor Mammedov <imammedo@redhat.com> writes:

> On Thu, 13 Jan 2022 22:33:38 +0000
> Sean Christopherson <seanjc@google.com> wrote:
>
>> On Mon, Jan 03, 2022, Igor Mammedov wrote:
>> > On Mon, 03 Jan 2022 09:04:29 +0100
>> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>> >   
>> > > Paolo Bonzini <pbonzini@redhat.com> writes:
>> > >   
>> > > > On 12/27/21 18:32, Igor Mammedov wrote:    
>> > > >>> Tweaked and queued nevertheless, thanks.    
>> > > >> it seems this patch breaks VCPU hotplug, in scenario:
>> > > >> 
>> > > >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
>> > > >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
>> > > >> 
>> > > >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
>> > > >>     
>> > > >
>> > > > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
>> > > > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
>> > > > the data passed to the ioctl is the same that was set before.    
>> > > 
>> > > Are we sure the data is going to be *exactly* the same? In particular,
>> > > when using vCPU fds from the parked list, do we keep the same
>> > > APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
>> > > different id?  
>> > 
>> > If I recall it right, it can be a different ID easily.  
>> 
>> No, it cannot.  KVM doesn't provide a way for userspace to change the APIC ID of
>> a vCPU after the vCPU is created.  x2APIC flat out disallows changing the APIC ID,
>> and unless there's magic I'm missing, apic_mmio_write() => kvm_lapic_reg_write()
>> is not reachable from userspace.
>> 
>> The only way for userspace to set the APIC ID is to change vcpu->vcpu_id, and that
>> can only be done at KVM_VCPU_CREATE.
>> 
>> So, reusing a parked vCPU for hotplug must reuse the same APIC ID.  QEMU handles
>> this by stashing the vcpu_id, a.k.a. APIC ID, when parking a vCPU, and reuses a
>> parked vCPU if and only if it has the same APIC ID.  And because QEMU derives the
>> APIC ID from topology, that means all the topology CPUID leafs must remain the
>> same, otherwise the guest is hosed because it will send IPIs to the wrong vCPUs.
>
> Indeed, I was wrong.
> I just checked all cpu unplug history in qemu. It was introduced in qemu-2.7
> and from the very beginning it did stash vcpu_id,
> so there is no old QEMU that would re-plug VCPU with different apic_id.
> Though tells us nothing about what other userspace implementations might do.
>

The genie is out of the bottle already, 5.16 is released with the change
(which was promissed for some time, KVM was complaining with
pr_warn_ratelimited()). I'd be brave and say that if QEMU doesn't need
it then nobody else does (out of curiosity, are there KVM VMMs besides
QEMU which support CPU hotplug out there?).

> However, a problem of failing KVM_SET_CPUID2 during VCPU re-plug
> is still there and re-plug will fail if KVM rejects repeated KVM_SET_CPUID2
> even if ioctl called with exactly the same CPUID leafs as the 1st call.
>

Assuming APIC id change doesn not need to be supported, I can send v2
here with an empty allowlist.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-14  9:31                 ` Vitaly Kuznetsov
@ 2022-01-14 11:22                   ` Igor Mammedov
  2022-01-14 12:25                     ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Igor Mammedov @ 2022-01-14 11:22 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson, linux-kernel,
	Sean Christopherson

On Fri, 14 Jan 2022 10:31:50 +0100
Vitaly Kuznetsov <vkuznets@redhat.com> wrote:

> Igor Mammedov <imammedo@redhat.com> writes:
> 
> > On Thu, 13 Jan 2022 22:33:38 +0000
> > Sean Christopherson <seanjc@google.com> wrote:
> >  
> >> On Mon, Jan 03, 2022, Igor Mammedov wrote:  
> >> > On Mon, 03 Jan 2022 09:04:29 +0100
> >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> >> >     
> >> > > Paolo Bonzini <pbonzini@redhat.com> writes:
> >> > >     
> >> > > > On 12/27/21 18:32, Igor Mammedov wrote:      
> >> > > >>> Tweaked and queued nevertheless, thanks.      
> >> > > >> it seems this patch breaks VCPU hotplug, in scenario:
> >> > > >> 
> >> > > >>    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
> >> > > >>    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> >> > > >> 
> >> > > >> RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> >> > > >>       
> >> > > >
> >> > > > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> >> > > > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> >> > > > the data passed to the ioctl is the same that was set before.      
> >> > > 
> >> > > Are we sure the data is going to be *exactly* the same? In particular,
> >> > > when using vCPU fds from the parked list, do we keep the same
> >> > > APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
> >> > > different id?    
> >> > 
> >> > If I recall it right, it can be a different ID easily.    
> >> 
> >> No, it cannot.  KVM doesn't provide a way for userspace to change the APIC ID of
> >> a vCPU after the vCPU is created.  x2APIC flat out disallows changing the APIC ID,
> >> and unless there's magic I'm missing, apic_mmio_write() => kvm_lapic_reg_write()
> >> is not reachable from userspace.
> >> 
> >> The only way for userspace to set the APIC ID is to change vcpu->vcpu_id, and that
> >> can only be done at KVM_VCPU_CREATE.
> >> 
> >> So, reusing a parked vCPU for hotplug must reuse the same APIC ID.  QEMU handles
> >> this by stashing the vcpu_id, a.k.a. APIC ID, when parking a vCPU, and reuses a
> >> parked vCPU if and only if it has the same APIC ID.  And because QEMU derives the
> >> APIC ID from topology, that means all the topology CPUID leafs must remain the
> >> same, otherwise the guest is hosed because it will send IPIs to the wrong vCPUs.  
> >
> > Indeed, I was wrong.
> > I just checked all cpu unplug history in qemu. It was introduced in qemu-2.7
> > and from the very beginning it did stash vcpu_id,
> > so there is no old QEMU that would re-plug VCPU with different apic_id.
> > Though tells us nothing about what other userspace implementations might do.
> >  
> 
> The genie is out of the bottle already, 5.16 is released with the change
> (which was promissed for some time, KVM was complaining with
> pr_warn_ratelimited()). I'd be brave and say that if QEMU doesn't need
> it then nobody else does (out of curiosity, are there KVM VMMs besides
> QEMU which support CPU hotplug out there?).
> 
> > However, a problem of failing KVM_SET_CPUID2 during VCPU re-plug
> > is still there and re-plug will fail if KVM rejects repeated KVM_SET_CPUID2
> > even if ioctl called with exactly the same CPUID leafs as the 1st call.
> >  
> 
> Assuming APIC id change doesn not need to be supported, I can send v2
> here with an empty allowlist.
As you mentioned in another thread black list would be better
to address Sean's concerns or just revert problematic commit.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-14 11:22                   ` Igor Mammedov
@ 2022-01-14 12:25                     ` Vitaly Kuznetsov
  2022-01-14 17:00                       ` Sean Christopherson
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-14 12:25 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson, linux-kernel,
	Sean Christopherson

Igor Mammedov <imammedo@redhat.com> writes:

> On Fri, 14 Jan 2022 10:31:50 +0100
> Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
>> Igor Mammedov <imammedo@redhat.com> writes:
>> 
>> 
>> > However, a problem of failing KVM_SET_CPUID2 during VCPU re-plug
>> > is still there and re-plug will fail if KVM rejects repeated KVM_SET_CPUID2
>> > even if ioctl called with exactly the same CPUID leafs as the 1st call.
>> >  
>> 
>> Assuming APIC id change doesn not need to be supported, I can send v2
>> here with an empty allowlist.
> As you mentioned in another thread black list would be better
> to address Sean's concerns or just revert problematic commit.
>

Personally, I'm leaning towards the blocklist approach even if just for
'documenting' the fact that KVM doesn't correctly handle the
change. Compared to a comment in the code, such approach could help
someone save tons of debugging time (if anyone ever decides do something
weird, like changing MAXPHYADDR on the fly).

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-14  8:28               ` Maxim Levitsky
@ 2022-01-14 16:08                 ` Sean Christopherson
  0 siblings, 0 replies; 36+ messages in thread
From: Sean Christopherson @ 2022-01-14 16:08 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Igor Mammedov, Vitaly Kuznetsov, Paolo Bonzini, kvm, Wanpeng Li,
	Jim Mattson, linux-kernel

On Fri, Jan 14, 2022, Maxim Levitsky wrote:
> On Thu, 2022-01-13 at 22:33 +0000, Sean Christopherson wrote:
> > On Mon, Jan 03, 2022, Igor Mammedov wrote:
> > > On Mon, 03 Jan 2022 09:04:29 +0100
> > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> > > 
> > > > Paolo Bonzini <pbonzini@redhat.com> writes:
> > > > 
> > > > > On 12/27/21 18:32, Igor Mammedov wrote:  
> > > > > > > Tweaked and queued nevertheless, thanks.  
> > > > > > it seems this patch breaks VCPU hotplug, in scenario:
> > > > > > 
> > > > > >    1. hotunplug existing VCPU (QEMU stores VCPU file descriptor in parked cpus list)
> > > > > >    2. hotplug it again (unsuspecting QEMU reuses stored file descriptor when recreating VCPU)
> > > > > > 
> > > > > > RHBZ:https://bugzilla.redhat.com/show_bug.cgi?id=2028337#c11
> > > > > >   
> > > > > 
> > > > > The fix here would be (in QEMU) to not call KVM_SET_CPUID2 again. 
> > > > > However, we need to work around it in KVM, and allow KVM_SET_CPUID2 if 
> > > > > the data passed to the ioctl is the same that was set before.  
> > > > 
> > > > Are we sure the data is going to be *exactly* the same? In particular,
> > > > when using vCPU fds from the parked list, do we keep the same
> > > > APIC/x2APIC id when hotplugging? Or can we actually hotplug with a
> > > > different id?
> > > 
> > > If I recall it right, it can be a different ID easily.
> > 
> > No, it cannot.  KVM doesn't provide a way for userspace to change the APIC ID of
> > a vCPU after the vCPU is created.  x2APIC flat out disallows changing the APIC ID,
> > and unless there's magic I'm missing, apic_mmio_write() => kvm_lapic_reg_write()
> > is not reachable from userspace.
> 
> So after all, it is true that vcpu_id == initial APIC_ID,
> and if we don't let guest change it, it will be always like that?

Except for kvm_apic_set_state(), which I forgot existed, yes.

> You said that its not true in the other mail in the thread. 

I was wrong, I was thinking that userspace could reach kvm_lapic_reg_write(), but
I forgot that there would be no connection without x2apic.  But I forgot about
kvm_apic_set_state()...

> I haven't checked it in the code yet, as I never was much worried about
> userspace changing, but I will check it soon.
> 
> I did a quick look and I see that at least the userspace can call
> 'kvm_apic_set_state' and it contains snapshot of all apic registers,
> including apic id.  However it would be very easy to add a check there and
> fail if userspace attempts to set APIC_ID != vcpu_id.

Yeah, hopefully that doesn't break any userspace.  I can't imagine it would,
because if the guest disabled and re-enabled the APIC, kvm_lapic_set_base() would
restore the APIC ID to vcpu_id.

With luck, that's the last hole we need to close...

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-14 12:25                     ` Vitaly Kuznetsov
@ 2022-01-14 17:00                       ` Sean Christopherson
  2022-01-17  9:55                         ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Sean Christopherson @ 2022-01-14 17:00 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Igor Mammedov, Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson, linux-kernel

On Fri, Jan 14, 2022, Vitaly Kuznetsov wrote:
> Igor Mammedov <imammedo@redhat.com> writes:
> 
> > On Fri, 14 Jan 2022 10:31:50 +0100
> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> >
> >> Igor Mammedov <imammedo@redhat.com> writes:
> >> 
> >> 
> >> > However, a problem of failing KVM_SET_CPUID2 during VCPU re-plug
> >> > is still there and re-plug will fail if KVM rejects repeated KVM_SET_CPUID2
> >> > even if ioctl called with exactly the same CPUID leafs as the 1st call.
> >> >  
> >> 
> >> Assuming APIC id change doesn not need to be supported, I can send v2
> >> here with an empty allowlist.
> > As you mentioned in another thread black list would be better
> > to address Sean's concerns or just revert problematic commit.
> >
> 
> Personally, I'm leaning towards the blocklist approach even if just for
> 'documenting' the fact that KVM doesn't correctly handle the
> change. Compared to a comment in the code, such approach could help
> someone save tons of debugging time (if anyone ever decides do something
> weird, like changing MAXPHYADDR on the fly).

I assume the blocklist approach is let userspace opt into rejecting KVM_SET_CPUID{,2},
but allow all CPUID leafs and sub-leafs to be modified at will by default?  I don't
dislike the idea, but I wonder if it's unnecessarily fancy.

What if we instead provide an ioctl/capability to let userspace toggle disabling
of KVM_SET_CPUID{,2}, a la STAC/CLAC to override SMAP?  E.g. QEMU could enable
protections after initially creating the vCPU, then temporarily disable protections
only for the hotplug path?

That'd provide solid protections for minimal effort, and if userspace can restrict
the danger zone to one specific path, then userspace can easily do its own auditing
for that one path.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-14 17:00                       ` Sean Christopherson
@ 2022-01-17  9:55                         ` Vitaly Kuznetsov
  2022-01-17 11:20                           ` Paolo Bonzini
  0 siblings, 1 reply; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-17  9:55 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Igor Mammedov, Paolo Bonzini, kvm, Wanpeng Li, Jim Mattson, linux-kernel

Sean Christopherson <seanjc@google.com> writes:

> On Fri, Jan 14, 2022, Vitaly Kuznetsov wrote:
>> Igor Mammedov <imammedo@redhat.com> writes:
>> 
>> > On Fri, 14 Jan 2022 10:31:50 +0100
>> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>> >
>> >> Igor Mammedov <imammedo@redhat.com> writes:
>> >> 
>> >> 
>> >> > However, a problem of failing KVM_SET_CPUID2 during VCPU re-plug
>> >> > is still there and re-plug will fail if KVM rejects repeated KVM_SET_CPUID2
>> >> > even if ioctl called with exactly the same CPUID leafs as the 1st call.
>> >> >  
>> >> 
>> >> Assuming APIC id change doesn not need to be supported, I can send v2
>> >> here with an empty allowlist.
>> > As you mentioned in another thread black list would be better
>> > to address Sean's concerns or just revert problematic commit.
>> >
>> 
>> Personally, I'm leaning towards the blocklist approach even if just for
>> 'documenting' the fact that KVM doesn't correctly handle the
>> change. Compared to a comment in the code, such approach could help
>> someone save tons of debugging time (if anyone ever decides do something
>> weird, like changing MAXPHYADDR on the fly).
>
> I assume the blocklist approach is let userspace opt into rejecting KVM_SET_CPUID{,2},
> but allow all CPUID leafs and sub-leafs to be modified at will by
> default? 

No, honestly I was thinking about something much simpler: instead of
forbidding KVM_SET_CPUID{,2} after KVM_RUN completely (what we have now
in 5.16), we only forbid to change certain data which we know breaks
some assumptions in MMU, from the comment:
"
         * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
         * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
         * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
         * faults due to reusing SPs/SPTEs.
"
It seems that CPU hotplug path doesn't need to change these so we don't
need an opt-in/opt-out, we can just forbid changing certain things for
the time being. Alternatively, we can silently ignore such changes but I
don't quite like it because it would mask bugs in VMMs.

> I don't dislike the idea, but I wonder if it's unnecessarily fancy.
>
> What if we instead provide an ioctl/capability to let userspace toggle disabling
> of KVM_SET_CPUID{,2}, a la STAC/CLAC to override SMAP?  E.g. QEMU could enable
> protections after initially creating the vCPU, then temporarily
> disable protections only for the hotplug path?
>
> That'd provide solid protections for minimal effort, and if userspace can restrict
> the danger zone to one specific path, then userspace can easily do its own auditing
> for that one path.

Could work but it seems the protection would only "protect" VMM from
shooting itself in the foot and will likely result in killing the guest
anyway so I'm wondering if it's worth it.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-17  9:55                         ` Vitaly Kuznetsov
@ 2022-01-17 11:20                           ` Paolo Bonzini
  2022-01-17 13:02                             ` Vitaly Kuznetsov
  0 siblings, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2022-01-17 11:20 UTC (permalink / raw)
  To: Vitaly Kuznetsov, Sean Christopherson
  Cc: Igor Mammedov, kvm, Wanpeng Li, Jim Mattson, linux-kernel

On 1/17/22 10:55, Vitaly Kuznetsov wrote:
> No, honestly I was thinking about something much simpler: instead of
> forbidding KVM_SET_CPUID{,2} after KVM_RUN completely (what we have now
> in 5.16), we only forbid to change certain data which we know breaks
> some assumptions in MMU, from the comment:
> "
>           * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
>           * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
>           * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
>           * faults due to reusing SPs/SPTEs.
> "
> It seems that CPU hotplug path doesn't need to change these so we don't
> need an opt-in/opt-out, we can just forbid changing certain things for
> the time being. Alternatively, we can silently ignore such changes but I
> don't quite like it because it would mask bugs in VMMs.

I think the version that only allows exactly the same CPUID is the best, 
as it leaves less room for future bugs.

Paolo


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN
  2022-01-17 11:20                           ` Paolo Bonzini
@ 2022-01-17 13:02                             ` Vitaly Kuznetsov
  0 siblings, 0 replies; 36+ messages in thread
From: Vitaly Kuznetsov @ 2022-01-17 13:02 UTC (permalink / raw)
  To: Paolo Bonzini, Sean Christopherson
  Cc: Igor Mammedov, kvm, Wanpeng Li, Jim Mattson, linux-kernel

Paolo Bonzini <pbonzini@redhat.com> writes:

> On 1/17/22 10:55, Vitaly Kuznetsov wrote:
>> No, honestly I was thinking about something much simpler: instead of
>> forbidding KVM_SET_CPUID{,2} after KVM_RUN completely (what we have now
>> in 5.16), we only forbid to change certain data which we know breaks
>> some assumptions in MMU, from the comment:
>> "
>>           * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
>>           * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
>>           * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
>>           * faults due to reusing SPs/SPTEs.
>> "
>> It seems that CPU hotplug path doesn't need to change these so we don't
>> need an opt-in/opt-out, we can just forbid changing certain things for
>> the time being. Alternatively, we can silently ignore such changes but I
>> don't quite like it because it would mask bugs in VMMs.
>
> I think the version that only allows exactly the same CPUID is the best, 
> as it leaves less room for future bugs.
>

Ok, I hear your vote) Will prepare v2.

-- 
Vitaly


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2022-01-17 13:02 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-22 17:58 [PATCH 0/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN Vitaly Kuznetsov
2021-11-22 17:58 ` [PATCH 1/2] KVM: selftests: Avoid KVM_SET_CPUID2 after KVM_RUN in hyperv_features test Vitaly Kuznetsov
2021-11-22 17:58 ` [PATCH 2/2] KVM: x86: Forbid KVM_SET_CPUID{,2} after KVM_RUN Vitaly Kuznetsov
2021-11-26 12:20   ` Paolo Bonzini
2021-12-27 17:32     ` Igor Mammedov
2022-01-02 17:31       ` Paolo Bonzini
2022-01-03  8:04         ` Vitaly Kuznetsov
2022-01-03  9:40           ` Igor Mammedov
2022-01-03 12:56             ` Vitaly Kuznetsov
2022-01-05  8:17               ` Igor Mammedov
2022-01-05  9:12                 ` Vitaly Kuznetsov
2022-01-05  9:10               ` Paolo Bonzini
2022-01-05 10:09                 ` Vitaly Kuznetsov
2022-01-07  9:02                   ` Vitaly Kuznetsov
2022-01-07 18:15                     ` Paolo Bonzini
2022-01-11  8:00                       ` Igor Mammedov
2022-01-12 13:58                         ` Vitaly Kuznetsov
2022-01-12 18:39                           ` Paolo Bonzini
2022-01-13  9:27                             ` Vitaly Kuznetsov
2022-01-13 14:28                               ` Maxim Levitsky
2022-01-13 14:36                                 ` Vitaly Kuznetsov
2022-01-13 14:41                                   ` Maxim Levitsky
2022-01-13 14:59                                     ` Vitaly Kuznetsov
2022-01-13 16:26                                       ` Sean Christopherson
2022-01-13 16:30                                         ` Maxim Levitsky
2022-01-13 22:33             ` Sean Christopherson
2022-01-14  8:28               ` Maxim Levitsky
2022-01-14 16:08                 ` Sean Christopherson
2022-01-14  8:55               ` Igor Mammedov
2022-01-14  9:31                 ` Vitaly Kuznetsov
2022-01-14 11:22                   ` Igor Mammedov
2022-01-14 12:25                     ` Vitaly Kuznetsov
2022-01-14 17:00                       ` Sean Christopherson
2022-01-17  9:55                         ` Vitaly Kuznetsov
2022-01-17 11:20                           ` Paolo Bonzini
2022-01-17 13:02                             ` Vitaly Kuznetsov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.