linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Roman Kagan <rkagan@virtuozzo.com>
To: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: kvm@vger.kernel.org, "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"K. Y. Srinivasan" <kys@microsoft.com>,
	"Haiyang Zhang" <haiyangz@microsoft.com>,
	"Stephen Hemminger" <sthemmin@microsoft.com>,
	"Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com>,
	"Mohammed Gamal" <mmorsy@redhat.com>,
	"Cathy Avery" <cavery@redhat.com>,
	"Wanpeng Li" <wanpeng.li@hotmail.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v6 6/7] KVM: x86: hyperv: optimize kvm_hv_flush_tlb() for vp_index == vcpu_idx case
Date: Thu, 27 Sep 2018 12:42:15 +0300	[thread overview]
Message-ID: <20180927094214.GD4186@rkaganb.sw.ru> (raw)
In-Reply-To: <20180926170259.29796-7-vkuznets@redhat.com>

On Wed, Sep 26, 2018 at 07:02:58PM +0200, Vitaly Kuznetsov wrote:
> VP inedx almost always matches VCPU and when it does it's faster to walk
> the sparse set instead of all vcpus.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/kvm/hyperv.c | 96 +++++++++++++++++++++++--------------------
>  1 file changed, 52 insertions(+), 44 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index eeb12eacd525..cc0535a078f7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1277,32 +1277,37 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
>  		return kvm_hv_get_msr(vcpu, msr, pdata, host);
>  }
>  
> -static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
> +static __always_inline bool hv_vcpu_in_sparse_set(struct kvm_vcpu_hv *hv_vcpu,
> +						  u64 sparse_banks[],
> +						  u64 valid_bank_mask)
>  {
> -	int i = 0, j;
> +	int bank = hv_vcpu->vp_index / 64, sbank;
>  
> -	if (!(valid_bank_mask & BIT_ULL(bank_no)))
> -		return -1;
> +	if (bank >= 64)
> +		return false;
>  
> -	for (j = 0; j < bank_no; j++)
> -		if (valid_bank_mask & BIT_ULL(j))
> -			i++;
> +	if (!(valid_bank_mask & BIT_ULL(bank)))
> +		return false;
>  
> -	return i;
> +	/* Sparse bank number equals to the number of set bits before it */
> +	sbank = bitmap_weight((unsigned long *)&valid_bank_mask, bank);
> +
> +	return !!(sparse_banks[sbank] & BIT_ULL(hv_vcpu->vp_index % 64));
>  }
>  
>  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
>  			    u16 rep_cnt, bool ex)
>  {
>  	struct kvm *kvm = current_vcpu->kvm;
> -	struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
>  	struct hv_tlb_flush_ex flush_ex;
>  	struct hv_tlb_flush flush;
>  	struct kvm_vcpu *vcpu;
>  	unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
> -	u64 valid_bank_mask = 0;
> +	u64 valid_bank_mask;
>  	u64 sparse_banks[64];
> -	int sparse_banks_len, i;
> +	int sparse_banks_len, i, bank, sbank;
>  	bool all_cpus;
>  
>  	if (!ex) {
> @@ -1312,6 +1317,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
>  		trace_kvm_hv_flush_tlb(flush.processor_mask,
>  				       flush.address_space, flush.flags);
>  
> +		valid_bank_mask = BIT_ULL(0);
>  		sparse_banks[0] = flush.processor_mask;
>  		all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
>  	} else {
> @@ -1344,52 +1350,54 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
>  			return HV_STATUS_INVALID_HYPERCALL_INPUT;
>  	}
>  
> -	cpumask_clear(&hv_current->tlb_lush);
> +	/*
> +	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
> +	 * analyze it here, flush TLB regardless of the specified address space.
> +	 */
> +	cpumask_clear(&hv_vcpu->tlb_lush);

Maybe squash this hv_current -> hv_vcpu renaming into patch 3?
(And yes this "lush" is funny, too ;)

>  
>  	if (all_cpus) {
>  		kvm_make_vcpus_request_mask(kvm,
>  				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
> -				    NULL, &hv_current->tlb_lush);
> +				    NULL, &hv_vcpu->tlb_lush);
>  		goto ret_success;
>  	}
>  
> -	kvm_for_each_vcpu(i, vcpu, kvm) {
> -		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
> -		int bank = hv->vp_index / 64, sbank = 0;
> -
> -		/* Banks >64 can't be represented */
> -		if (bank >= 64)
> -			continue;
> -
> -		/* Non-ex hypercalls can only address first 64 vCPUs */
> -		if (!ex && bank)
> -			continue;
> -
> -		if (ex) {
> -			/*
> -			 * Check is the bank of this vCPU is in sparse
> -			 * set and get the sparse bank number.
> -			 */
> -			sbank = get_sparse_bank_no(valid_bank_mask, bank);
> -
> -			if (sbank < 0)
> -				continue;
> +	if (atomic_read(&hv->num_mismatched_vp_indexes)) {
> +		kvm_for_each_vcpu(i, vcpu, kvm) {
> +			if (hv_vcpu_in_sparse_set(&vcpu->arch.hyperv,
> +						  sparse_banks,
> +						  valid_bank_mask))
> +				__set_bit(i, vcpu_bitmap);
>  		}
> +		goto flush_request;
> +	}
>  
> -		if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
> -			continue;
> -
> -		/*
> -		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
> -		 * can't analyze it here, flush TLB regardless of the specified
> -		 * address space.
> -		 */
> -		__set_bit(i, vcpu_bitmap);
> +	/*
> +	 * num_mismatched_vp_indexes is zero so every vcpu has
> +	 * vp_index == vcpu_idx.
> +	 */
> +	sbank = 0;
> +	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
> +			 BITS_PER_LONG) {

s/BITS_PER_LONG/64/

> +		for_each_set_bit(i,
> +				 (unsigned long *)&sparse_banks[sbank],
> +				 BITS_PER_LONG) {

ditto

> +			u32 vp_index = bank * 64 + i;
> +
> +			/* A non-existent vCPU was specified */
> +			if (vp_index >= KVM_MAX_VCPUS)
> +				return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +
> +			__set_bit(vp_index, vcpu_bitmap);
> +		}
> +		sbank++;
>  	}

I wonder if copying the bank as a whole would make it easier to follow
(and somewhat more efficient):

	sbank = 0;
	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64)
		((u64)vcpu_bitmap)[bank] = sparse_banks[sbank++];

Also it seems equally efficient but slightly easier to read if
vcpu_bitmap is filled first regardless of ->num_mismatched_vp_indexes,
and then either passed directly to kvm_make_vcpus_request_mask if
num_mismatched_vp_indexes == 0, or converted into the real vcpu mask
using the regular test_bit otherwise.

So eventually it all would look like

	...
	u64 vp_bitmap[KVM_MAX_VCPUS / 64] = {0};
  	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS) = {0};
	unsigned long *vcpu_mask;
	...
	if (all_cpus) {
		vcpu_mask = NULL;
		goto flush_request;
	}

	sbank = 0;
	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64)
		vp_bitmap[bank] = sparse_banks[sbank++];

	if (likely(!atomic_read(&hv->num_mismatched_vp_indexes)) {
		/* for all vcpus vp_index == vcpu_idx */
		vcpu_mask = vp_bitmap;
		goto flush_request;
	}

	kvm_for_each_vcpu(i, vcpu, kvm)
		if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, vp_bitmap))
			__set_bit(i, vcpu_bitmap);
	vcpu_mask = vcpu_bitmap;
	...

>  
> +flush_request:
>  	kvm_make_vcpus_request_mask(kvm,
>  				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
> -				    vcpu_bitmap, &hv_current->tlb_lush);
> +				    vcpu_bitmap, &hv_vcpu->tlb_lush);
>  
>  ret_success:
>  	/* We always do full TLB flush, set rep_done = rep_cnt. */

Roman.

  reply	other threads:[~2018-09-27  9:42 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-26 17:02 [PATCH v6 0/7] KVM: x86: hyperv: PV IPI support for Windows guests Vitaly Kuznetsov
2018-09-26 17:02 ` [PATCH v6 1/7] KVM: x86: hyperv: enforce vp_index < KVM_MAX_VCPUS Vitaly Kuznetsov
2018-09-26 17:02 ` [PATCH v6 2/7] KVM: x86: hyperv: optimize 'all cpus' case in kvm_hv_flush_tlb() Vitaly Kuznetsov
2018-09-26 17:02 ` [PATCH v6 3/7] KVM: x86: hyperv: consistently use 'hv_vcpu' for 'struct kvm_vcpu_hv' variables Vitaly Kuznetsov
2018-09-27  7:49   ` Roman Kagan
2018-09-26 17:02 ` [PATCH v6 4/7] KVM: x86: hyperv: keep track of mismatched VP indexes Vitaly Kuznetsov
2018-09-27  7:59   ` Roman Kagan
2018-09-27  9:17     ` Vitaly Kuznetsov
2018-10-01 15:48       ` Paolo Bonzini
2018-10-01 15:54         ` Roman Kagan
2018-10-01 15:57           ` Roman Kagan
2018-09-26 17:02 ` [PATCH v6 5/7] KVM: x86: hyperv: valid_bank_mask should be 'u64' Vitaly Kuznetsov
2018-09-27  8:01   ` Roman Kagan
2018-09-26 17:02 ` [PATCH v6 6/7] KVM: x86: hyperv: optimize kvm_hv_flush_tlb() for vp_index == vcpu_idx case Vitaly Kuznetsov
2018-09-27  9:42   ` Roman Kagan [this message]
2018-09-26 17:02 ` [PATCH v6 7/7] KVM: x86: hyperv: implement PV IPI send hypercalls Vitaly Kuznetsov
2018-09-27 11:07   ` Roman Kagan
2018-10-01 16:01     ` Paolo Bonzini
2018-10-01 16:20       ` Vitaly Kuznetsov
2018-10-01 16:21         ` Paolo Bonzini
2018-10-01 16:41           ` Vitaly Kuznetsov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180927094214.GD4186@rkaganb.sw.ru \
    --to=rkagan@virtuozzo.com \
    --cc=Michael.H.Kelley@microsoft.com \
    --cc=cavery@redhat.com \
    --cc=haiyangz@microsoft.com \
    --cc=kvm@vger.kernel.org \
    --cc=kys@microsoft.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mmorsy@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=sthemmin@microsoft.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpeng.li@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).