All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
@ 2021-10-28 21:34 Sean Christopherson
  2021-10-29 12:56 ` Vitaly Kuznetsov
  2021-10-31 19:05   ` kernel test robot
  0 siblings, 2 replies; 8+ messages in thread
From: Sean Christopherson @ 2021-10-28 21:34 UTC (permalink / raw)
  To: Paolo Bonzini, Nathan Chancellor, Nick Desaulniers
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, llvm, linux-kernel, Ajay Garg

Move the vp_bitmap "allocation" that's need to handle mismatched vp_index
values down into sparse_set_to_vcpu_mask() and drop __always_inline from
said helper.  The vp_bitmap mess is a detail that's specific to the sparse
translation and does not need to be exposed to the caller.

The underlying motivation is to fudge around a compilation warning/error
when CONFIG_KASAN_STACK=y, which is selected (and can't be unselected) by
CONFIG_KASAN=y when compiling with gcc (clang/LLVM is a stack hog in some
cases so it's opt-in for clang).  KASAN_STACK adds a redzone around every
stack variable, which pushes the Hyper-V functions over the default limit
of 1024.  With CONFIG_KVM_WERROR=y, this breaks the build.  Shuffling which
function is charged with vp_bitmap gets all functions below the default
limit.

Regarding the __always_inline, prior to commit f21dd494506a ("KVM: x86:
hyperv: optimize sparse VP set processing") the helper, then named
hv_vcpu_in_sparse_set(), was a tiny bit of code that effectively boiled
down to a handful of bit ops.  The __always_inline was understandable, if
not justifiable.  Since the aforementioned change, sparse_set_to_vcpu_mask()
is a chunky 350-450+ bytes of code without KASAN=y, and balloons to 1100+
with KASAN=y.  In other words, it has no business being forcefully inlined.

Reported-by: Ajay Garg <ajaygargnsit@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---

Vitaly (and anyone with extensive KVM + Hyper-V knowledge), it would be
really helpful to get better coverage in kvm-unit-tests.  There's a smoke
test for this in selftests, but it's not really all that interesting.  It
took me over an hour and a half just to get a Linux guest to hit the
relevant flows.  Most of that was due to QEMU 5.1 bugs (doesn't advertise
HYPERCALL MSR by default) and Linux guest stupidity (silently disables
itself if said MSR isn't available), but it was really annoying to have to
go digging through QEMU to figure out how to even enable features that are
extensive/critical enough to warrant their own tests.

/wave to the clang folks for the pattern patch on the changelog ;-)

 arch/x86/kvm/hyperv.c | 55 ++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f15c0165c05..80018cfab5c7 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1710,31 +1710,36 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 		return kvm_hv_get_msr(vcpu, msr, pdata, host);
 }
 
-static __always_inline unsigned long *sparse_set_to_vcpu_mask(
-	struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
-	u64 *vp_bitmap, unsigned long *vcpu_bitmap)
+static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
+				    u64 valid_bank_mask, unsigned long *vcpu_mask)
 {
 	struct kvm_hv *hv = to_kvm_hv(kvm);
+	bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
+	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
 	struct kvm_vcpu *vcpu;
 	int i, bank, sbank = 0;
+	u64 *bitmap;
 
-	memset(vp_bitmap, 0,
-	       KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
+	BUILD_BUG_ON(sizeof(vp_bitmap) >
+		     sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
+
+	/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
+	if (likely(!has_mismatch))
+		bitmap = (u64 *)vcpu_mask;
+
+	memset(bitmap, 0, sizeof(vp_bitmap));
 	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
 			 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
-		vp_bitmap[bank] = sparse_banks[sbank++];
+		bitmap[bank] = sparse_banks[sbank++];
 
-	if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
-		/* for all vcpus vp_index == vcpu_idx */
-		return (unsigned long *)vp_bitmap;
-	}
+	if (likely(!has_mismatch))
+		return;
 
-	bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
+	bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
-			__set_bit(i, vcpu_bitmap);
+			__set_bit(i, vcpu_mask);
 	}
-	return vcpu_bitmap;
 }
 
 struct kvm_hv_hcall {
@@ -1756,9 +1761,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
-	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
-	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
-	unsigned long *vcpu_mask;
+	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
 	u64 valid_bank_mask;
 	u64 sparse_banks[64];
 	int sparse_banks_len;
@@ -1842,11 +1845,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	if (all_cpus) {
 		kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
 	} else {
-		vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
-						    vp_bitmap, vcpu_bitmap);
+		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
 
-		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
-					    vcpu_mask);
+		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, vcpu_mask);
 	}
 
 ret_success:
@@ -1879,9 +1880,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_send_ipi_ex send_ipi_ex;
 	struct hv_send_ipi send_ipi;
-	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
-	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
-	unsigned long *vcpu_mask;
+	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
 	unsigned long valid_bank_mask;
 	u64 sparse_banks[64];
 	int sparse_banks_len;
@@ -1937,11 +1936,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
 		return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
-	vcpu_mask = all_cpus ? NULL :
-		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
-					vp_bitmap, vcpu_bitmap);
+	if (all_cpus) {
+		kvm_send_ipi_to_many(kvm, vector, NULL);
+	} else {
+		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
 
-	kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
+		kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
+	}
 
 ret_success:
 	return HV_STATUS_SUCCESS;
-- 
2.33.0.1079.g6e70778dc9-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
  2021-10-28 21:34 [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask() Sean Christopherson
@ 2021-10-29 12:56 ` Vitaly Kuznetsov
  2021-10-29 14:32   ` Sean Christopherson
  2021-10-31 19:05   ` kernel test robot
  1 sibling, 1 reply; 8+ messages in thread
From: Vitaly Kuznetsov @ 2021-10-29 12:56 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Wanpeng Li, Jim Mattson, Joerg Roedel, kvm, llvm, linux-kernel,
	Ajay Garg, Paolo Bonzini, Nathan Chancellor, Nick Desaulniers

Sean Christopherson <seanjc@google.com> writes:

> Move the vp_bitmap "allocation" that's need to handle mismatched vp_index
> values down into sparse_set_to_vcpu_mask() and drop __always_inline from
> said helper.  The vp_bitmap mess is a detail that's specific to the sparse
> translation and does not need to be exposed to the caller.
>
> The underlying motivation is to fudge around a compilation warning/error
> when CONFIG_KASAN_STACK=y, which is selected (and can't be unselected) by
> CONFIG_KASAN=y when compiling with gcc (clang/LLVM is a stack hog in some
> cases so it's opt-in for clang).  KASAN_STACK adds a redzone around every
> stack variable, which pushes the Hyper-V functions over the default limit
> of 1024.  With CONFIG_KVM_WERROR=y, this breaks the build.  Shuffling which
> function is charged with vp_bitmap gets all functions below the default
> limit.
>
> Regarding the __always_inline, prior to commit f21dd494506a ("KVM: x86:
> hyperv: optimize sparse VP set processing") the helper, then named
> hv_vcpu_in_sparse_set(), was a tiny bit of code that effectively boiled
> down to a handful of bit ops.  The __always_inline was understandable, if
> not justifiable.  Since the aforementioned change, sparse_set_to_vcpu_mask()
> is a chunky 350-450+ bytes of code without KASAN=y, and balloons to 1100+
> with KASAN=y.  In other words, it has no business being forcefully inlined.
>
> Reported-by: Ajay Garg <ajaygargnsit@gmail.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>
> Vitaly (and anyone with extensive KVM + Hyper-V knowledge), it would be
> really helpful to get better coverage in kvm-unit-tests.

I can't agree more. This *is* in my backlog but unfortunately I can't
give any forcast on when I'll get to it :-(

>  There's a smoke
> test for this in selftests, but it's not really all that interesting.  It
> took me over an hour and a half just to get a Linux guest to hit the
> relevant flows.  Most of that was due to QEMU 5.1 bugs (doesn't advertise
> HYPERCALL MSR by default)

This should be fixed already, right?

>  and Linux guest stupidity (silently disables
> itself if said MSR isn't available), but it was really annoying to have to
> go digging through QEMU to figure out how to even enable features that are
> extensive/critical enough to warrant their own tests.
>
> /wave to the clang folks for the pattern patch on the changelog ;-)
>
>  arch/x86/kvm/hyperv.c | 55 ++++++++++++++++++++++---------------------
>  1 file changed, 28 insertions(+), 27 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 4f15c0165c05..80018cfab5c7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1710,31 +1710,36 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
>  		return kvm_hv_get_msr(vcpu, msr, pdata, host);
>  }
>  
> -static __always_inline unsigned long *sparse_set_to_vcpu_mask(
> -	struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
> -	u64 *vp_bitmap, unsigned long *vcpu_bitmap)
> +static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
> +				    u64 valid_bank_mask, unsigned long *vcpu_mask)
>  {
>  	struct kvm_hv *hv = to_kvm_hv(kvm);
> +	bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
> +	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
>  	struct kvm_vcpu *vcpu;
>  	int i, bank, sbank = 0;
> +	u64 *bitmap;
>  
> -	memset(vp_bitmap, 0,
> -	       KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
> +	BUILD_BUG_ON(sizeof(vp_bitmap) >
> +		     sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
> +
> +	/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
> +	if (likely(!has_mismatch))
> +		bitmap = (u64 *)vcpu_mask;
> +
> +	memset(bitmap, 0, sizeof(vp_bitmap));

... but in the unlikely case has_mismatch == true 'bitmap' is still
uninitialized here, right? How doesn't it crash?

>  	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
>  			 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
> -		vp_bitmap[bank] = sparse_banks[sbank++];
> +		bitmap[bank] = sparse_banks[sbank++];
>  
> -	if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
> -		/* for all vcpus vp_index == vcpu_idx */
> -		return (unsigned long *)vp_bitmap;
> -	}
> +	if (likely(!has_mismatch))
> +		return;
>  
> -	bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
> +	bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))

'vp_bitmap' also doesn't seem to be assigned to anything, I'm really
confused :-(

Didn't you accidentally mix up 'vp_bitmap' and 'bitmap'?

> -			__set_bit(i, vcpu_bitmap);
> +			__set_bit(i, vcpu_mask);
>  	}
> -	return vcpu_bitmap;
>  }
>  
>  struct kvm_hv_hcall {
> @@ -1756,9 +1761,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>  	struct kvm *kvm = vcpu->kvm;
>  	struct hv_tlb_flush_ex flush_ex;
>  	struct hv_tlb_flush flush;
> -	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
> -	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
> -	unsigned long *vcpu_mask;
> +	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
>  	u64 valid_bank_mask;
>  	u64 sparse_banks[64];
>  	int sparse_banks_len;
> @@ -1842,11 +1845,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>  	if (all_cpus) {
>  		kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
>  	} else {
> -		vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
> -						    vp_bitmap, vcpu_bitmap);
> +		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
>  
> -		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
> -					    vcpu_mask);
> +		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, vcpu_mask);

We're not bound by 80-char limit anymore, are we? :-)

>  	}
>  
>  ret_success:
> @@ -1879,9 +1880,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>  	struct kvm *kvm = vcpu->kvm;
>  	struct hv_send_ipi_ex send_ipi_ex;
>  	struct hv_send_ipi send_ipi;
> -	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
> -	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
> -	unsigned long *vcpu_mask;
> +	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
>  	unsigned long valid_bank_mask;
>  	u64 sparse_banks[64];
>  	int sparse_banks_len;
> @@ -1937,11 +1936,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
>  	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
>  		return HV_STATUS_INVALID_HYPERCALL_INPUT;
>  
> -	vcpu_mask = all_cpus ? NULL :
> -		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
> -					vp_bitmap, vcpu_bitmap);
> +	if (all_cpus) {
> +		kvm_send_ipi_to_many(kvm, vector, NULL);
> +	} else {
> +		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
>  
> -	kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
> +		kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
> +	}
>  
>  ret_success:
>  	return HV_STATUS_SUCCESS;

-- 
Vitaly


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
  2021-10-29 12:56 ` Vitaly Kuznetsov
@ 2021-10-29 14:32   ` Sean Christopherson
  2021-10-29 19:06     ` Sean Christopherson
  0 siblings, 1 reply; 8+ messages in thread
From: Sean Christopherson @ 2021-10-29 14:32 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Wanpeng Li, Jim Mattson, Joerg Roedel, kvm, llvm, linux-kernel,
	Ajay Garg, Paolo Bonzini, Nathan Chancellor, Nick Desaulniers

On Fri, Oct 29, 2021, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> >  There's a smoke test for this in selftests, but it's not really all that
> >  interesting.  It took me over an hour and a half just to get a Linux guest
> >  to hit the relevant flows.  Most of that was due to QEMU 5.1 bugs (doesn't
> >  advertise HYPERCALL MSR by default)
> 
> This should be fixed already, right?

Yeah, it's fixed in more recent versions.  That added to the confusion; the local
copy of QEMU source I was reading didn't match the binary I was using.  Doh.

> > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > index 4f15c0165c05..80018cfab5c7 100644
> > --- a/arch/x86/kvm/hyperv.c
> > +++ b/arch/x86/kvm/hyperv.c
> > @@ -1710,31 +1710,36 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
> >  		return kvm_hv_get_msr(vcpu, msr, pdata, host);
> >  }
> >  
> > -static __always_inline unsigned long *sparse_set_to_vcpu_mask(
> > -	struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
> > -	u64 *vp_bitmap, unsigned long *vcpu_bitmap)
> > +static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
> > +				    u64 valid_bank_mask, unsigned long *vcpu_mask)
> >  {
> >  	struct kvm_hv *hv = to_kvm_hv(kvm);
> > +	bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
> > +	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
> >  	struct kvm_vcpu *vcpu;
> >  	int i, bank, sbank = 0;
> > +	u64 *bitmap;
> >  
> > -	memset(vp_bitmap, 0,
> > -	       KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
> > +	BUILD_BUG_ON(sizeof(vp_bitmap) >
> > +		     sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
> > +
> > +	/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
> > +	if (likely(!has_mismatch))
> > +		bitmap = (u64 *)vcpu_mask;
> > +
> > +	memset(bitmap, 0, sizeof(vp_bitmap));
> 
> ... but in the unlikely case has_mismatch == true 'bitmap' is still
> uninitialized here, right? How doesn't it crash?

I'm sure it does crash.  I'll hack the guest to actually test this.  More below.
 
> >  	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
> >  			 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
> > -		vp_bitmap[bank] = sparse_banks[sbank++];
> > +		bitmap[bank] = sparse_banks[sbank++];
> >  
> > -	if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
> > -		/* for all vcpus vp_index == vcpu_idx */
> > -		return (unsigned long *)vp_bitmap;
> > -	}
> > +	if (likely(!has_mismatch))
> > +		return;
> >  
> > -	bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
> > +	bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
> >  	kvm_for_each_vcpu(i, vcpu, kvm) {
> >  		if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
> 
> 'vp_bitmap' also doesn't seem to be assigned to anything, I'm really
> confused :-(
>
> Didn't you accidentally mix up 'vp_bitmap' and 'bitmap'?

No, bitmap was supposed to be initialized as:

	if (likely(!has_mismatch))
		bitmap = (u64 *)vcpu_mask;
	else
		bitmap = vp_bitmap;

The idea being that the !mismatch case sets vcpu_mask directly, and the mismatch
case sets vp_bitmap and then uses that to fill vcpu_mask.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
  2021-10-29 14:32   ` Sean Christopherson
@ 2021-10-29 19:06     ` Sean Christopherson
  2021-10-29 19:26       ` Sean Christopherson
  0 siblings, 1 reply; 8+ messages in thread
From: Sean Christopherson @ 2021-10-29 19:06 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Wanpeng Li, Jim Mattson, Joerg Roedel, kvm, llvm, linux-kernel,
	Ajay Garg, Paolo Bonzini, Nathan Chancellor, Nick Desaulniers

On Fri, Oct 29, 2021, Sean Christopherson wrote:
> On Fri, Oct 29, 2021, Vitaly Kuznetsov wrote:
> > > +	/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
> > > +	if (likely(!has_mismatch))
> > > +		bitmap = (u64 *)vcpu_mask;
> > > +
> > > +	memset(bitmap, 0, sizeof(vp_bitmap));
> > 
> > ... but in the unlikely case has_mismatch == true 'bitmap' is still
> > uninitialized here, right? How doesn't it crash?
> 
> I'm sure it does crash.  I'll hack the guest to actually test this.

Crash confirmed.  But I don't feel too bad about my one-line goof because the
existing code botches sparse VP_SET, i.e. _EX flows.  The spec requires the guest
to explicit specify the number of QWORDS in the variable header[*], e.g. VP_SET
in this case, but KVM ignores that and does a harebrained calculation to "count"
the number of sparse banks.  It does this by counting the number of bits set in
valid_bank_mask, which is comically broken because (a) the whole "sparse" thing
should be a clue that they banks are not packed together, (b) the spec clearly
states that "bank = VPindex / 64", (c) the sparse_bank madness makes this waaaay
more complicated than it needs to be, and (d) the massive sparse_bank allocation
on the stack is completely unnecessary because KVM simply ignores everything that
wouldn't fit in vp_bitmap.

To reproduce, stuff vp_index in descending order starting from KVM_MAX_VCPUS - 1.

	hv_vcpu->vp_index = KVM_MAX_VCPUS - vcpu->vcpu_idx - 1;

E.g. with an 8 vCPU guest, KVM will calculate sparse_banks_len=1, read zeros, and
do nothing, hanging the guest because it never sends IPIs.

So v2 will be completely different because the "fix" for the KASAN issue is to
get rid of sparse_banks entirely.

[1] https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface#variable-sized-hypercall-input-headers
[2] https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/datatypes/hv_vp_set#sparse-virtual-processor-set

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
  2021-10-29 19:06     ` Sean Christopherson
@ 2021-10-29 19:26       ` Sean Christopherson
  2021-10-29 19:42         ` Sean Christopherson
  0 siblings, 1 reply; 8+ messages in thread
From: Sean Christopherson @ 2021-10-29 19:26 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Wanpeng Li, Jim Mattson, Joerg Roedel, kvm, llvm, linux-kernel,
	Ajay Garg, Paolo Bonzini, Nathan Chancellor, Nick Desaulniers

On Fri, Oct 29, 2021, Sean Christopherson wrote:
> On Fri, Oct 29, 2021, Sean Christopherson wrote:
> > On Fri, Oct 29, 2021, Vitaly Kuznetsov wrote:
> > > > +	/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
> > > > +	if (likely(!has_mismatch))
> > > > +		bitmap = (u64 *)vcpu_mask;
> > > > +
> > > > +	memset(bitmap, 0, sizeof(vp_bitmap));
> > > 
> > > ... but in the unlikely case has_mismatch == true 'bitmap' is still
> > > uninitialized here, right? How doesn't it crash?
> > 
> > I'm sure it does crash.  I'll hack the guest to actually test this.
> 
> Crash confirmed.  But I don't feel too bad about my one-line goof because the
> existing code botches sparse VP_SET, i.e. _EX flows.  The spec requires the guest
> to explicit specify the number of QWORDS in the variable header[*], e.g. VP_SET
> in this case, but KVM ignores that and does a harebrained calculation to "count"
> the number of sparse banks.  It does this by counting the number of bits set in
> valid_bank_mask, which is comically broken because (a) the whole "sparse" thing
> should be a clue that they banks are not packed together, (b) the spec clearly
> states that "bank = VPindex / 64", (c) the sparse_bank madness makes this waaaay
> more complicated than it needs to be, and (d) the massive sparse_bank allocation
> on the stack is completely unnecessary because KVM simply ignores everything that
> wouldn't fit in vp_bitmap.
> 
> To reproduce, stuff vp_index in descending order starting from KVM_MAX_VCPUS - 1.
> 
> 	hv_vcpu->vp_index = KVM_MAX_VCPUS - vcpu->vcpu_idx - 1;
> 
> E.g. with an 8 vCPU guest, KVM will calculate sparse_banks_len=1, read zeros, and
> do nothing, hanging the guest because it never sends IPIs.
 
Ugh, I can't read.  The example[*] clarifies that the "sparse" VP_SET packs things
into BankContents.  I don't think I imagined my guest hanging though, so something
is awry.  Back to debugging...

[*] https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/datatypes/hv_vp_set#processor-set-example

> So v2 will be completely different because the "fix" for the KASAN issue is to
> get rid of sparse_banks entirely.
> 
> [1] https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface#variable-sized-hypercall-input-headers
> [2] https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/datatypes/hv_vp_set#sparse-virtual-processor-set

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
  2021-10-29 19:26       ` Sean Christopherson
@ 2021-10-29 19:42         ` Sean Christopherson
  0 siblings, 0 replies; 8+ messages in thread
From: Sean Christopherson @ 2021-10-29 19:42 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Wanpeng Li, Jim Mattson, Joerg Roedel, kvm, llvm, linux-kernel,
	Ajay Garg, Paolo Bonzini, Nathan Chancellor, Nick Desaulniers

On Fri, Oct 29, 2021, Sean Christopherson wrote:
> On Fri, Oct 29, 2021, Sean Christopherson wrote:
> > On Fri, Oct 29, 2021, Sean Christopherson wrote:
> > > On Fri, Oct 29, 2021, Vitaly Kuznetsov wrote:
> > > > > +	/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
> > > > > +	if (likely(!has_mismatch))
> > > > > +		bitmap = (u64 *)vcpu_mask;
> > > > > +
> > > > > +	memset(bitmap, 0, sizeof(vp_bitmap));
> > > > 
> > > > ... but in the unlikely case has_mismatch == true 'bitmap' is still
> > > > uninitialized here, right? How doesn't it crash?
> > > 
> > > I'm sure it does crash.  I'll hack the guest to actually test this.
> > 
> > Crash confirmed.  But I don't feel too bad about my one-line goof because the
> > existing code botches sparse VP_SET, i.e. _EX flows.  The spec requires the guest
> > to explicit specify the number of QWORDS in the variable header[*], e.g. VP_SET
> > in this case, but KVM ignores that and does a harebrained calculation to "count"
> > the number of sparse banks.  It does this by counting the number of bits set in
> > valid_bank_mask, which is comically broken because (a) the whole "sparse" thing
> > should be a clue that they banks are not packed together, (b) the spec clearly
> > states that "bank = VPindex / 64", (c) the sparse_bank madness makes this waaaay
> > more complicated than it needs to be, and (d) the massive sparse_bank allocation
> > on the stack is completely unnecessary because KVM simply ignores everything that
> > wouldn't fit in vp_bitmap.
> > 
> > To reproduce, stuff vp_index in descending order starting from KVM_MAX_VCPUS - 1.
> > 
> > 	hv_vcpu->vp_index = KVM_MAX_VCPUS - vcpu->vcpu_idx - 1;
> > 
> > E.g. with an 8 vCPU guest, KVM will calculate sparse_banks_len=1, read zeros, and
> > do nothing, hanging the guest because it never sends IPIs.
>  
> Ugh, I can't read.  The example[*] clarifies that the "sparse" VP_SET packs things
> into BankContents.  I don't think I imagined my guest hanging though, so something
> is awry.  Back to debugging...

Found the culprit.  When __send_ipi_mask_ex() (in the guest) sees that the target
set is all present CPUs, it skips setting the sparse VP_SET and goes straight to
HV_GENERIC_SET_ALL, but still issues the _EX versions.  KVM mishandles that case
by skipping the IPIs altogether when there's no sparse banks.  The spec says that
it's legal for there to be no sparse banks if the data is not needed, which is
the case here since the format is not sparse.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
  2021-10-28 21:34 [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask() Sean Christopherson
@ 2021-10-31 19:05   ` kernel test robot
  2021-10-31 19:05   ` kernel test robot
  1 sibling, 0 replies; 8+ messages in thread
From: kernel test robot @ 2021-10-31 19:05 UTC (permalink / raw)
  To: Sean Christopherson, Paolo Bonzini, Nathan Chancellor, Nick Desaulniers
  Cc: llvm, kbuild-all, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, kvm

[-- Attachment #1: Type: text/plain, Size: 3865 bytes --]

Hi Sean,

I love your patch! Perhaps something to improve:

[auto build test WARNING on kvm/queue]
[also build test WARNING on next-20211029]
[cannot apply to v5.15-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sean-Christopherson/KVM-x86-Shove-vp_bitmap-handling-down-into-sparse_set_to_vcpu_mask/20211029-053606
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
config: x86_64-randconfig-a015-20211031 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 82ed106567063ea269c6d5669278b733e173a42f)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/1a2811bcda8f6cc49c4458744e96bdbabc3a38b0
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sean-Christopherson/KVM-x86-Shove-vp_bitmap-handling-down-into-sparse_set_to_vcpu_mask/20211029-053606
        git checkout 1a2811bcda8f6cc49c4458744e96bdbabc3a38b0
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> arch/x86/kvm/hyperv.c:1727:6: warning: variable 'bitmap' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
           if (likely(!has_mismatch))
               ^~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:45:21: note: expanded from macro 'likely'
   #  define likely(x)     (__branch_check__(x, 1, __builtin_constant_p(x)))
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/hyperv.c:1730:9: note: uninitialized use occurs here
           memset(bitmap, 0, sizeof(vp_bitmap));
                  ^~~~~~
   arch/x86/kvm/hyperv.c:1727:2: note: remove the 'if' if its condition is always true
           if (likely(!has_mismatch))
           ^~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/hyperv.c:1721:13: note: initialize the variable 'bitmap' to silence this warning
           u64 *bitmap;
                      ^
                       = NULL
   1 warning generated.


vim +1727 arch/x86/kvm/hyperv.c

  1712	
  1713	static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
  1714					    u64 valid_bank_mask, unsigned long *vcpu_mask)
  1715	{
  1716		struct kvm_hv *hv = to_kvm_hv(kvm);
  1717		bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
  1718		u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
  1719		struct kvm_vcpu *vcpu;
  1720		int i, bank, sbank = 0;
  1721		u64 *bitmap;
  1722	
  1723		BUILD_BUG_ON(sizeof(vp_bitmap) >
  1724			     sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
  1725	
  1726		/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
> 1727		if (likely(!has_mismatch))
  1728			bitmap = (u64 *)vcpu_mask;
  1729	
  1730		memset(bitmap, 0, sizeof(vp_bitmap));
  1731		for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
  1732				 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
  1733			bitmap[bank] = sparse_banks[sbank++];
  1734	
  1735		if (likely(!has_mismatch))
  1736			return;
  1737	
  1738		bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
  1739		kvm_for_each_vcpu(i, vcpu, kvm) {
  1740			if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
  1741				__set_bit(i, vcpu_mask);
  1742		}
  1743	}
  1744	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 38981 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask()
@ 2021-10-31 19:05   ` kernel test robot
  0 siblings, 0 replies; 8+ messages in thread
From: kernel test robot @ 2021-10-31 19:05 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3955 bytes --]

Hi Sean,

I love your patch! Perhaps something to improve:

[auto build test WARNING on kvm/queue]
[also build test WARNING on next-20211029]
[cannot apply to v5.15-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sean-Christopherson/KVM-x86-Shove-vp_bitmap-handling-down-into-sparse_set_to_vcpu_mask/20211029-053606
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
config: x86_64-randconfig-a015-20211031 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 82ed106567063ea269c6d5669278b733e173a42f)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/1a2811bcda8f6cc49c4458744e96bdbabc3a38b0
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sean-Christopherson/KVM-x86-Shove-vp_bitmap-handling-down-into-sparse_set_to_vcpu_mask/20211029-053606
        git checkout 1a2811bcda8f6cc49c4458744e96bdbabc3a38b0
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> arch/x86/kvm/hyperv.c:1727:6: warning: variable 'bitmap' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
           if (likely(!has_mismatch))
               ^~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:45:21: note: expanded from macro 'likely'
   #  define likely(x)     (__branch_check__(x, 1, __builtin_constant_p(x)))
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/hyperv.c:1730:9: note: uninitialized use occurs here
           memset(bitmap, 0, sizeof(vp_bitmap));
                  ^~~~~~
   arch/x86/kvm/hyperv.c:1727:2: note: remove the 'if' if its condition is always true
           if (likely(!has_mismatch))
           ^~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/hyperv.c:1721:13: note: initialize the variable 'bitmap' to silence this warning
           u64 *bitmap;
                      ^
                       = NULL
   1 warning generated.


vim +1727 arch/x86/kvm/hyperv.c

  1712	
  1713	static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
  1714					    u64 valid_bank_mask, unsigned long *vcpu_mask)
  1715	{
  1716		struct kvm_hv *hv = to_kvm_hv(kvm);
  1717		bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
  1718		u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
  1719		struct kvm_vcpu *vcpu;
  1720		int i, bank, sbank = 0;
  1721		u64 *bitmap;
  1722	
  1723		BUILD_BUG_ON(sizeof(vp_bitmap) >
  1724			     sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
  1725	
  1726		/* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */
> 1727		if (likely(!has_mismatch))
  1728			bitmap = (u64 *)vcpu_mask;
  1729	
  1730		memset(bitmap, 0, sizeof(vp_bitmap));
  1731		for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
  1732				 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
  1733			bitmap[bank] = sparse_banks[sbank++];
  1734	
  1735		if (likely(!has_mismatch))
  1736			return;
  1737	
  1738		bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
  1739		kvm_for_each_vcpu(i, vcpu, kvm) {
  1740			if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
  1741				__set_bit(i, vcpu_mask);
  1742		}
  1743	}
  1744	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 38981 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-10-31 19:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-28 21:34 [PATCH] KVM: x86: Shove vp_bitmap handling down into sparse_set_to_vcpu_mask() Sean Christopherson
2021-10-29 12:56 ` Vitaly Kuznetsov
2021-10-29 14:32   ` Sean Christopherson
2021-10-29 19:06     ` Sean Christopherson
2021-10-29 19:26       ` Sean Christopherson
2021-10-29 19:42         ` Sean Christopherson
2021-10-31 19:05 ` kernel test robot
2021-10-31 19:05   ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.