From: Roman Kagan <rkagan@virtuozzo.com>
To: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: kvm@vger.kernel.org, "Paolo Bonzini" <pbonzini@redhat.com>,
"Radim Krčmář" <rkrcmar@redhat.com>,
"K. Y. Srinivasan" <kys@microsoft.com>,
"Haiyang Zhang" <haiyangz@microsoft.com>,
"Stephen Hemminger" <sthemmin@microsoft.com>,
"Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com>,
"Mohammed Gamal" <mmorsy@redhat.com>,
"Cathy Avery" <cavery@redhat.com>,
"Wanpeng Li" <wanpeng.li@hotmail.com>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v6 6/7] KVM: x86: hyperv: optimize kvm_hv_flush_tlb() for vp_index == vcpu_idx case
Date: Thu, 27 Sep 2018 12:42:15 +0300 [thread overview]
Message-ID: <20180927094214.GD4186@rkaganb.sw.ru> (raw)
In-Reply-To: <20180926170259.29796-7-vkuznets@redhat.com>
On Wed, Sep 26, 2018 at 07:02:58PM +0200, Vitaly Kuznetsov wrote:
> VP inedx almost always matches VCPU and when it does it's faster to walk
> the sparse set instead of all vcpus.
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
> arch/x86/kvm/hyperv.c | 96 +++++++++++++++++++++++--------------------
> 1 file changed, 52 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index eeb12eacd525..cc0535a078f7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1277,32 +1277,37 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
> return kvm_hv_get_msr(vcpu, msr, pdata, host);
> }
>
> -static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
> +static __always_inline bool hv_vcpu_in_sparse_set(struct kvm_vcpu_hv *hv_vcpu,
> + u64 sparse_banks[],
> + u64 valid_bank_mask)
> {
> - int i = 0, j;
> + int bank = hv_vcpu->vp_index / 64, sbank;
>
> - if (!(valid_bank_mask & BIT_ULL(bank_no)))
> - return -1;
> + if (bank >= 64)
> + return false;
>
> - for (j = 0; j < bank_no; j++)
> - if (valid_bank_mask & BIT_ULL(j))
> - i++;
> + if (!(valid_bank_mask & BIT_ULL(bank)))
> + return false;
>
> - return i;
> + /* Sparse bank number equals to the number of set bits before it */
> + sbank = bitmap_weight((unsigned long *)&valid_bank_mask, bank);
> +
> + return !!(sparse_banks[sbank] & BIT_ULL(hv_vcpu->vp_index % 64));
> }
>
> static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
> u16 rep_cnt, bool ex)
> {
> struct kvm *kvm = current_vcpu->kvm;
> - struct kvm_vcpu_hv *hv_current = ¤t_vcpu->arch.hyperv;
> + struct kvm_hv *hv = &kvm->arch.hyperv;
> + struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv;
> struct hv_tlb_flush_ex flush_ex;
> struct hv_tlb_flush flush;
> struct kvm_vcpu *vcpu;
> unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
> - u64 valid_bank_mask = 0;
> + u64 valid_bank_mask;
> u64 sparse_banks[64];
> - int sparse_banks_len, i;
> + int sparse_banks_len, i, bank, sbank;
> bool all_cpus;
>
> if (!ex) {
> @@ -1312,6 +1317,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
> trace_kvm_hv_flush_tlb(flush.processor_mask,
> flush.address_space, flush.flags);
>
> + valid_bank_mask = BIT_ULL(0);
> sparse_banks[0] = flush.processor_mask;
> all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
> } else {
> @@ -1344,52 +1350,54 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
> return HV_STATUS_INVALID_HYPERCALL_INPUT;
> }
>
> - cpumask_clear(&hv_current->tlb_lush);
> + /*
> + * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
> + * analyze it here, flush TLB regardless of the specified address space.
> + */
> + cpumask_clear(&hv_vcpu->tlb_lush);
Maybe squash this hv_current -> hv_vcpu renaming into patch 3?
(And yes this "lush" is funny, too ;)
>
> if (all_cpus) {
> kvm_make_vcpus_request_mask(kvm,
> KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
> - NULL, &hv_current->tlb_lush);
> + NULL, &hv_vcpu->tlb_lush);
> goto ret_success;
> }
>
> - kvm_for_each_vcpu(i, vcpu, kvm) {
> - struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
> - int bank = hv->vp_index / 64, sbank = 0;
> -
> - /* Banks >64 can't be represented */
> - if (bank >= 64)
> - continue;
> -
> - /* Non-ex hypercalls can only address first 64 vCPUs */
> - if (!ex && bank)
> - continue;
> -
> - if (ex) {
> - /*
> - * Check is the bank of this vCPU is in sparse
> - * set and get the sparse bank number.
> - */
> - sbank = get_sparse_bank_no(valid_bank_mask, bank);
> -
> - if (sbank < 0)
> - continue;
> + if (atomic_read(&hv->num_mismatched_vp_indexes)) {
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (hv_vcpu_in_sparse_set(&vcpu->arch.hyperv,
> + sparse_banks,
> + valid_bank_mask))
> + __set_bit(i, vcpu_bitmap);
> }
> + goto flush_request;
> + }
>
> - if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
> - continue;
> -
> - /*
> - * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
> - * can't analyze it here, flush TLB regardless of the specified
> - * address space.
> - */
> - __set_bit(i, vcpu_bitmap);
> + /*
> + * num_mismatched_vp_indexes is zero so every vcpu has
> + * vp_index == vcpu_idx.
> + */
> + sbank = 0;
> + for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
> + BITS_PER_LONG) {
s/BITS_PER_LONG/64/
> + for_each_set_bit(i,
> + (unsigned long *)&sparse_banks[sbank],
> + BITS_PER_LONG) {
ditto
> + u32 vp_index = bank * 64 + i;
> +
> + /* A non-existent vCPU was specified */
> + if (vp_index >= KVM_MAX_VCPUS)
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +
> + __set_bit(vp_index, vcpu_bitmap);
> + }
> + sbank++;
> }
I wonder if copying the bank as a whole would make it easier to follow
(and somewhat more efficient):
sbank = 0;
for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64)
((u64)vcpu_bitmap)[bank] = sparse_banks[sbank++];
Also it seems equally efficient but slightly easier to read if
vcpu_bitmap is filled first regardless of ->num_mismatched_vp_indexes,
and then either passed directly to kvm_make_vcpus_request_mask if
num_mismatched_vp_indexes == 0, or converted into the real vcpu mask
using the regular test_bit otherwise.
So eventually it all would look like
...
u64 vp_bitmap[KVM_MAX_VCPUS / 64] = {0};
DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS) = {0};
unsigned long *vcpu_mask;
...
if (all_cpus) {
vcpu_mask = NULL;
goto flush_request;
}
sbank = 0;
for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64)
vp_bitmap[bank] = sparse_banks[sbank++];
if (likely(!atomic_read(&hv->num_mismatched_vp_indexes)) {
/* for all vcpus vp_index == vcpu_idx */
vcpu_mask = vp_bitmap;
goto flush_request;
}
kvm_for_each_vcpu(i, vcpu, kvm)
if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, vp_bitmap))
__set_bit(i, vcpu_bitmap);
vcpu_mask = vcpu_bitmap;
...
>
> +flush_request:
> kvm_make_vcpus_request_mask(kvm,
> KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
> - vcpu_bitmap, &hv_current->tlb_lush);
> + vcpu_bitmap, &hv_vcpu->tlb_lush);
>
> ret_success:
> /* We always do full TLB flush, set rep_done = rep_cnt. */
Roman.
next prev parent reply other threads:[~2018-09-27 9:42 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-26 17:02 [PATCH v6 0/7] KVM: x86: hyperv: PV IPI support for Windows guests Vitaly Kuznetsov
2018-09-26 17:02 ` [PATCH v6 1/7] KVM: x86: hyperv: enforce vp_index < KVM_MAX_VCPUS Vitaly Kuznetsov
2018-09-26 17:02 ` [PATCH v6 2/7] KVM: x86: hyperv: optimize 'all cpus' case in kvm_hv_flush_tlb() Vitaly Kuznetsov
2018-09-26 17:02 ` [PATCH v6 3/7] KVM: x86: hyperv: consistently use 'hv_vcpu' for 'struct kvm_vcpu_hv' variables Vitaly Kuznetsov
2018-09-27 7:49 ` Roman Kagan
2018-09-26 17:02 ` [PATCH v6 4/7] KVM: x86: hyperv: keep track of mismatched VP indexes Vitaly Kuznetsov
2018-09-27 7:59 ` Roman Kagan
2018-09-27 9:17 ` Vitaly Kuznetsov
2018-10-01 15:48 ` Paolo Bonzini
2018-10-01 15:54 ` Roman Kagan
2018-10-01 15:57 ` Roman Kagan
2018-09-26 17:02 ` [PATCH v6 5/7] KVM: x86: hyperv: valid_bank_mask should be 'u64' Vitaly Kuznetsov
2018-09-27 8:01 ` Roman Kagan
2018-09-26 17:02 ` [PATCH v6 6/7] KVM: x86: hyperv: optimize kvm_hv_flush_tlb() for vp_index == vcpu_idx case Vitaly Kuznetsov
2018-09-27 9:42 ` Roman Kagan [this message]
2018-09-26 17:02 ` [PATCH v6 7/7] KVM: x86: hyperv: implement PV IPI send hypercalls Vitaly Kuznetsov
2018-09-27 11:07 ` Roman Kagan
2018-10-01 16:01 ` Paolo Bonzini
2018-10-01 16:20 ` Vitaly Kuznetsov
2018-10-01 16:21 ` Paolo Bonzini
2018-10-01 16:41 ` Vitaly Kuznetsov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180927094214.GD4186@rkaganb.sw.ru \
--to=rkagan@virtuozzo.com \
--cc=Michael.H.Kelley@microsoft.com \
--cc=cavery@redhat.com \
--cc=haiyangz@microsoft.com \
--cc=kvm@vger.kernel.org \
--cc=kys@microsoft.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mmorsy@redhat.com \
--cc=pbonzini@redhat.com \
--cc=rkrcmar@redhat.com \
--cc=sthemmin@microsoft.com \
--cc=vkuznets@redhat.com \
--cc=wanpeng.li@hotmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).