linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Like Xu <like.xu@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	kvm@vger.kernel.org
Cc: Sean Christopherson <sean.j.christopherson@intel.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Kan Liang <kan.liang@linux.intel.com>,
	luwei.kang@intel.com, Thomas Gleixner <tglx@linutronix.de>,
	wei.w.wang@intel.com, Tony Luck <tony.luck@intel.com>,
	Stephane Eranian <eranian@google.com>,
	Mark Gross <mgross@linux.intel.com>,
	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH RFC v2 17/17] KVM: x86/pmu: Adjust guest DS pebs reset counter values for mapped counter
Date: Mon,  9 Nov 2020 10:12:54 +0800	[thread overview]
Message-ID: <20201109021254.79755-18-like.xu@linux.intel.com> (raw)
In-Reply-To: <20201109021254.79755-1-like.xu@linux.intel.com>

The original PEBS reset counter value has been saved to pmc->reset_counter.

When the guest PEBS counter X is enabled, the reset value RST-x would be
written to guest DS reset field RST-y and it will be auto reloaded to the
real host counter Y which is mapped to the guest PEBS counter X during
this vm-entry period.

KVM would record each last host reset counter index field for each guest
PEBS counter and trigger the reset values rewrite once any entry in the
host-guest counter mapping table is changed before vm-entry.

The frequent changes in the mapping relationship should only happen when
perf multiplexes the counters with the default 1ms timer. The time cost
of adjusting the guest reset values will not exceed 1ms (13347ns on ICX),
and there will be no race with the multiplex timer to create a livelock.

Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/pmu.c              | 15 +++++++++++
 arch/x86/kvm/pmu.h              |  1 +
 arch/x86/kvm/vmx/pmu_intel.c    | 47 ++++++++++++++++++++++++++++++---
 4 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 32a677ff1e55..93026e9361d9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -418,6 +418,7 @@ struct kvm_pmc {
 	enum pmc_type type;
 	u8 idx;
 	u64 counter;
+	u8 host_idx;
 	u64 reset_counter;
 	u64 eventsel;
 	struct perf_event *perf_event;
@@ -460,6 +461,7 @@ struct kvm_pmu {
 	bool need_rewrite_ds_pebs_interrupt_threshold;
 	bool need_rewrite_pebs_records;
 	bool need_save_reset_counter;
+	bool need_rewrite_reset_counter;
 
 	/*
 	 * The gate to release perf_events not marked in
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 64dce19644e3..d12dbe07117e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -154,6 +154,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	if (IS_ERR(event)) {
 		pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
 			    PTR_ERR(event), pmc->idx);
+		pmc->host_idx = -1;
 		return;
 	}
 
@@ -554,6 +555,7 @@ void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu)
 	int bit;
 
 	pmu->counter_cross_mapped = false;
+	pmu->need_rewrite_reset_counter = false;
 
 	for_each_set_bit(bit, (unsigned long *)&pmu->pebs_enable, X86_PMC_IDX_MAX) {
 		pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
@@ -567,6 +569,19 @@ void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	for_each_set_bit(bit, (unsigned long *)&pmu->pebs_enable, X86_PMC_IDX_MAX) {
+		pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
+
+		if (!pmc || !pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
+			continue;
+
+		if ((pmc->perf_event && (pmc->host_idx != pmc->perf_event->hw.idx))) {
+			pmu->need_rewrite_reset_counter = true;
+			kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+			break;
+		}
+	}
+
 	if (!pmu->counter_cross_mapped)
 		return;
 
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 6cdc9fd03195..2776a048fd27 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -74,6 +74,7 @@ static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
 		pmc->perf_event = NULL;
 		pmc->current_config = 0;
 		pmc_to_pmu(pmc)->event_count--;
+		pmc->host_idx = -1;
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f5a69addd7a8..0aab3a4f9e41 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -659,10 +659,46 @@ static int save_ds_pebs_reset_values(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static int rewrite_ds_pebs_reset_counters(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc = NULL;
+	gpa_t gpa;
+	int srcu_idx, bit, ret;
+	u64 offset, host_idx, idx;
+
+	ret = -EFAULT;
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	for_each_set_bit(bit, (unsigned long *)&pmu->pebs_enable, X86_PMC_IDX_MAX) {
+		pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
+
+		if (!pmc || !pmc->perf_event)
+			continue;
+
+		host_idx = pmc->perf_event->hw.idx;
+		idx = (host_idx < INTEL_PMC_IDX_FIXED) ?
+				host_idx : (MAX_PEBS_EVENTS + host_idx - INTEL_PMC_IDX_FIXED);
+		offset = offsetof(struct debug_store, pebs_event_reset) + sizeof(u64) * idx;
+		gpa = kvm_mmu_gva_to_gpa_system(vcpu, pmu->ds_area + offset, NULL);
+		if (gpa == UNMAPPED_GVA)
+			goto out;
+
+		if (kvm_write_guest(vcpu->kvm, gpa, &pmc->reset_counter, sizeof(u64)))
+			goto out;
+
+		pmc->host_idx = pmc->perf_event->hw.idx;
+	}
+	ret = 0;
+
+out:
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+	return ret;
+}
+
 void intel_pmu_handle_event(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	int ret1, ret2, ret3;
+	int ret1, ret2, ret3, ret4;
 
 	if (pmu->need_rewrite_pebs_records) {
 		pmu->need_rewrite_pebs_records = false;
@@ -682,11 +718,16 @@ void intel_pmu_handle_event(struct kvm_vcpu *vcpu)
 		ret3 = save_ds_pebs_reset_values(vcpu);
 	}
 
+	if (pmu->need_rewrite_reset_counter) {
+		ret4 = pmu->need_rewrite_reset_counter = false;
+		rewrite_ds_pebs_reset_counters(vcpu);
+	}
+
 out:
 
-	if (ret1 == -ENOMEM || ret2 == -ENOMEM || ret3 == -ENOMEM)
+	if (ret1 == -ENOMEM || ret2 == -ENOMEM || ret3 == -ENOMEM || ret4 == -ENOMEM)
 		pr_debug_ratelimited("%s: Fail to emulate guest PEBS due to OOM.", __func__);
-	else if (ret1 == -EFAULT || ret2 == -EFAULT || ret3 == -EFAULT)
+	else if (ret1 == -EFAULT || ret2 == -EFAULT || ret3 == -EFAULT || ret4 == -EFAULT)
 		pr_debug_ratelimited("%s: Fail to emulate guest PEBS due to GPA fault.", __func__);
 }
 
-- 
2.21.3


  parent reply	other threads:[~2020-11-09  2:18 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-09  2:12 [PATCH RFC v2 00/17] KVM: x86/pmu: Add support to enable Guest PEBS via DS Like Xu
2020-11-09  2:12 ` [PATCH v2 01/17] KVM: x86/pmu: Set MSR_IA32_MISC_ENABLE_EMON bit when vPMU is enabled Like Xu
2020-11-09  2:12 ` [PATCH v2 02/17] KVM: vmx/pmu: Use IA32_PERF_CAPABILITIES to adjust features visibility Like Xu
2020-11-09  2:12 ` [PATCH v2 03/17] KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter Like Xu
2020-11-09  2:12 ` [PATCH v2 04/17] perf: x86/ds: Handle guest PEBS overflow PMI and inject it to guest Like Xu
2020-11-17 14:35   ` Peter Zijlstra
2020-11-18 16:15     ` Like Xu
2020-11-18 18:07       ` Peter Zijlstra
2020-11-19  1:36         ` Xu, Like
2020-11-27  2:14         ` Xu, Like
2020-11-30 10:49           ` Peter Zijlstra
2020-12-01  1:25             ` Xu, Like
2020-11-09  2:12 ` [PATCH v2 05/17] KVM: x86/pmu: Reprogram guest PEBS event to emulate guest PEBS counter Like Xu
2020-11-17 14:41   ` Peter Zijlstra
2020-11-18 16:18     ` Like Xu
2020-11-09  2:12 ` [PATCH v2 06/17] KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS Like Xu
2020-11-09  2:12 ` [PATCH v2 07/17] KVM: x86/pmu: Add IA32_DS_AREA MSR emulation to manage guest DS buffer Like Xu
2020-11-09  2:12 ` [PATCH v2 08/17] KVM: x86/pmu: Add PEBS_DATA_CFG MSR emulation to support adaptive PEBS Like Xu
2020-11-09  2:12 ` [PATCH v2 09/17] KVM: x86: Set PEBS_UNAVAIL in IA32_MISC_ENABLE when PEBS is enabled Like Xu
2020-11-09  2:12 ` [PATCH v2 10/17] KVM: x86/pmu: Expose CPUIDs feature bits PDCM, DS, DTES64 Like Xu
2020-11-09  2:12 ` [PATCH v2 11/17] KVM: x86/pmu: Adjust precise_ip to emulate Ice Lake guest PDIR counter Like Xu
2020-11-09  2:12 ` [PATCH v2 12/17] KVM: x86/pmu: Disable guest PEBS when counters are cross-mapped Like Xu
2020-11-09  2:12 ` [PATCH RFC v2 13/17] KVM: x86/pmu: Add hook to emulate pebs for cross-mapped counters Like Xu
2020-11-09  2:12 ` [PATCH RFC v2 14/17] KVM: vmx/pmu: Limit pebs_interrupt_threshold in the guest DS area Like Xu
2020-11-09  2:12 ` [PATCH RFC v2 15/17] KVM: vmx/pmu: Rewrite applicable_counters field in the guest PEBS record Like Xu
2020-11-09  2:12 ` [PATCH RFC v2 16/17] KVM: x86/pmu: Save guest pebs reset value when a pebs counter is configured Like Xu
2020-11-09  2:12 ` Like Xu [this message]
2020-11-10 15:12 ` [PATCH RFC v2 00/17] KVM: x86/pmu: Add support to enable Guest PEBS via DS Peter Zijlstra
2020-11-10 15:37   ` [PATCH] perf/intel: Remove Perfmon-v4 counter_freezing support Peter Zijlstra
2020-11-10 20:52     ` Stephane Eranian
2020-11-11  2:42       ` Xu, Like
2021-01-26  9:51         ` Paolo Bonzini
2021-01-26 10:36           ` Peter Zijlstra
2021-01-26 11:35           ` Xu, Like
2021-01-26 11:59             ` Paolo Bonzini
2020-11-11  8:38       ` Peter Zijlstra
2020-11-16  3:22     ` Like Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201109021254.79755-18-like.xu@linux.intel.com \
    --to=like.xu@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kan.liang@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luwei.kang@intel.com \
    --cc=mgross@linux.intel.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=srinivas.pandruvada@linux.intel.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=wei.w.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).