From: Like Xu <like.xu@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
Paolo Bonzini <pbonzini@redhat.com>,
kvm@vger.kernel.org
Cc: Sean Christopherson <sean.j.christopherson@intel.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Wanpeng Li <wanpengli@tencent.com>,
Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
Kan Liang <kan.liang@linux.intel.com>,
luwei.kang@intel.com, Thomas Gleixner <tglx@linutronix.de>,
wei.w.wang@intel.com, Tony Luck <tony.luck@intel.com>,
Stephane Eranian <eranian@google.com>,
Mark Gross <mgross@linux.intel.com>,
Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH RFC v2 14/17] KVM: vmx/pmu: Limit pebs_interrupt_threshold in the guest DS area
Date: Mon, 9 Nov 2020 10:12:51 +0800 [thread overview]
Message-ID: <20201109021254.79755-15-like.xu@linux.intel.com> (raw)
In-Reply-To: <20201109021254.79755-1-like.xu@linux.intel.com>
If the host counter X is scheduled to the guest PEBS counter Y,
the guest ds pebs_interrupt_threshold field in guest DS area would
be changed to only ONE record before vm-entry which helps KVM
more easily and accurately handle the cross-mapping emulation
when the PEBS overflow PMI is generated.
In most cases, the guest counters would not be scheduled in a cross-mapped
way which means there is no need to change guest DS
pebs_interrupt_threshold and the applicable_counters fields in the guest
PEBS records are naturally correct. PEBS facility writes multiple PEBS
records into guest DS w/o interception and the performance is good.
AFAIK, we don't expect that changing the pebs_interrupt_threshold value
from the KVM side will break any guest PEBS drivers.
Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/pmu.c | 17 +++-----
arch/x86/kvm/pmu.h | 11 +++++
arch/x86/kvm/vmx/pmu_intel.c | 71 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
5 files changed, 90 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bffb384485da..77b529b8c16a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -456,6 +456,7 @@ struct kvm_pmu {
u64 pebs_data_cfg_mask;
bool counter_cross_mapped;
+ bool need_rewrite_ds_pebs_interrupt_threshold;
/*
* The gate to release perf_events not marked in
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index f87be3c2140e..7c8e3ca5b7ad 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -471,17 +471,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
}
-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (pmc_is_fixed(pmc))
- return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
- return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
/* Release perf_events for vPMCs that have been unused for a full time slice. */
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
{
@@ -576,4 +565,10 @@ void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu)
break;
}
}
+
+ if (!pmu->counter_cross_mapped)
+ return;
+
+ if (pmu->need_rewrite_ds_pebs_interrupt_threshold)
+ kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index b1e52e33f08c..6cdc9fd03195 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -147,6 +147,17 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
return sample_period;
}
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 2917105e584e..346b1104e674 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -211,6 +211,23 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
return pmc;
}
+static void intel_pmu_pebs_setup(struct kvm_pmu *pmu)
+{
+ struct kvm_pmc *pmc = NULL;
+ int bit;
+
+ pmu->need_rewrite_ds_pebs_interrupt_threshold = false;
+
+ for_each_set_bit(bit, (unsigned long *)&pmu->pebs_enable, X86_PMC_IDX_MAX) {
+ pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
+
+ if (pmc && pmc_speculative_in_use(pmc)) {
+ pmu->need_rewrite_ds_pebs_interrupt_threshold = true;
+ break;
+ }
+ }
+}
+
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -287,6 +304,8 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
if (kvm_valid_perf_global_ctrl(pmu, data)) {
global_ctrl_changed(pmu, data);
+ if (pmu->global_ctrl & pmu->pebs_enable)
+ intel_pmu_pebs_setup(pmu);
return 0;
}
break;
@@ -491,12 +510,64 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmu->global_ovf_ctrl = 0;
}
+static int rewrite_ds_pebs_interrupt_threshold(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct debug_store *ds = NULL;
+ u64 new_threshold, offset;
+ gpa_t gpa;
+ int srcu_idx, ret = -ENOMEM;
+
+ ds = kmalloc(sizeof(struct debug_store), GFP_KERNEL);
+ if (!ds)
+ goto out;
+
+ ret = -EFAULT;
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu, pmu->ds_area, NULL);
+ if (gpa == UNMAPPED_GVA)
+ goto unlock_out;
+
+ if (kvm_read_guest(vcpu->kvm, gpa, ds, sizeof(struct debug_store)))
+ goto unlock_out;
+
+ /* Adding sizeof(struct pebs_basic) offset is enough to generate PMI. */
+ new_threshold = ds->pebs_buffer_base + sizeof(struct pebs_basic);
+ offset = offsetof(struct debug_store, pebs_interrupt_threshold);
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu, pmu->ds_area + offset, NULL);
+ if (gpa == UNMAPPED_GVA)
+ goto unlock_out;
+
+ if (kvm_write_guest(vcpu->kvm, gpa, &new_threshold, sizeof(u64)))
+ goto unlock_out;
+
+ ret = 0;
+
+unlock_out:
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+out:
+ kfree(ds);
+ return ret;
+}
+
void intel_pmu_handle_event(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ int ret;
if (!(pmu->global_ctrl & pmu->pebs_enable))
return;
+
+ if (pmu->counter_cross_mapped && pmu->need_rewrite_ds_pebs_interrupt_threshold) {
+ ret = rewrite_ds_pebs_interrupt_threshold(vcpu);
+ pmu->need_rewrite_ds_pebs_interrupt_threshold = false;
+ }
+
+ if (ret == -ENOMEM)
+ pr_debug_ratelimited("%s: Fail to emulate guest PEBS due to OOM.", __func__);
+ else if (ret == -EFAULT)
+ pr_debug_ratelimited("%s: Fail to emulate guest PEBS due to GPA fault.", __func__);
}
struct kvm_pmu_ops intel_pmu_ops = {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 88a544e6379f..8db0811c1dd3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5856,6 +5856,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
{
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
}
+EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_system);
static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu, u32 access,
--
2.21.3
next prev parent reply other threads:[~2020-11-09 2:17 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-09 2:12 [PATCH RFC v2 00/17] KVM: x86/pmu: Add support to enable Guest PEBS via DS Like Xu
2020-11-09 2:12 ` [PATCH v2 01/17] KVM: x86/pmu: Set MSR_IA32_MISC_ENABLE_EMON bit when vPMU is enabled Like Xu
2020-11-09 2:12 ` [PATCH v2 02/17] KVM: vmx/pmu: Use IA32_PERF_CAPABILITIES to adjust features visibility Like Xu
2020-11-09 2:12 ` [PATCH v2 03/17] KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter Like Xu
2020-11-09 2:12 ` [PATCH v2 04/17] perf: x86/ds: Handle guest PEBS overflow PMI and inject it to guest Like Xu
2020-11-17 14:35 ` Peter Zijlstra
2020-11-18 16:15 ` Like Xu
2020-11-18 18:07 ` Peter Zijlstra
2020-11-19 1:36 ` Xu, Like
2020-11-27 2:14 ` Xu, Like
2020-11-30 10:49 ` Peter Zijlstra
2020-12-01 1:25 ` Xu, Like
2020-11-09 2:12 ` [PATCH v2 05/17] KVM: x86/pmu: Reprogram guest PEBS event to emulate guest PEBS counter Like Xu
2020-11-17 14:41 ` Peter Zijlstra
2020-11-18 16:18 ` Like Xu
2020-11-09 2:12 ` [PATCH v2 06/17] KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS Like Xu
2020-11-09 2:12 ` [PATCH v2 07/17] KVM: x86/pmu: Add IA32_DS_AREA MSR emulation to manage guest DS buffer Like Xu
2020-11-09 2:12 ` [PATCH v2 08/17] KVM: x86/pmu: Add PEBS_DATA_CFG MSR emulation to support adaptive PEBS Like Xu
2020-11-09 2:12 ` [PATCH v2 09/17] KVM: x86: Set PEBS_UNAVAIL in IA32_MISC_ENABLE when PEBS is enabled Like Xu
2020-11-09 2:12 ` [PATCH v2 10/17] KVM: x86/pmu: Expose CPUIDs feature bits PDCM, DS, DTES64 Like Xu
2020-11-09 2:12 ` [PATCH v2 11/17] KVM: x86/pmu: Adjust precise_ip to emulate Ice Lake guest PDIR counter Like Xu
2020-11-09 2:12 ` [PATCH v2 12/17] KVM: x86/pmu: Disable guest PEBS when counters are cross-mapped Like Xu
2020-11-09 2:12 ` [PATCH RFC v2 13/17] KVM: x86/pmu: Add hook to emulate pebs for cross-mapped counters Like Xu
2020-11-09 2:12 ` Like Xu [this message]
2020-11-09 2:12 ` [PATCH RFC v2 15/17] KVM: vmx/pmu: Rewrite applicable_counters field in the guest PEBS record Like Xu
2020-11-09 2:12 ` [PATCH RFC v2 16/17] KVM: x86/pmu: Save guest pebs reset value when a pebs counter is configured Like Xu
2020-11-09 2:12 ` [PATCH RFC v2 17/17] KVM: x86/pmu: Adjust guest DS pebs reset counter values for mapped counter Like Xu
2020-11-10 15:12 ` [PATCH RFC v2 00/17] KVM: x86/pmu: Add support to enable Guest PEBS via DS Peter Zijlstra
2020-11-10 15:37 ` [PATCH] perf/intel: Remove Perfmon-v4 counter_freezing support Peter Zijlstra
2020-11-10 20:52 ` Stephane Eranian
2020-11-11 2:42 ` Xu, Like
2021-01-26 9:51 ` Paolo Bonzini
2021-01-26 10:36 ` Peter Zijlstra
2021-01-26 11:35 ` Xu, Like
2021-01-26 11:59 ` Paolo Bonzini
2020-11-11 8:38 ` Peter Zijlstra
2020-11-16 3:22 ` Like Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201109021254.79755-15-like.xu@linux.intel.com \
--to=like.xu@linux.intel.com \
--cc=eranian@google.com \
--cc=jmattson@google.com \
--cc=joro@8bytes.org \
--cc=kan.liang@linux.intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luwei.kang@intel.com \
--cc=mgross@linux.intel.com \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=sean.j.christopherson@intel.com \
--cc=srinivas.pandruvada@linux.intel.com \
--cc=tglx@linutronix.de \
--cc=tony.luck@intel.com \
--cc=vkuznets@redhat.com \
--cc=wanpengli@tencent.com \
--cc=wei.w.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).