linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wei Wang <wei.w.wang@intel.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	pbonzini@redhat.com, ak@linux.intel.com, peterz@infradead.org
Cc: mingo@redhat.com, rkrcmar@redhat.com, like.xu@intel.com,
	wei.w.wang@intel.com
Subject: [PATCH v1 3/8] KVM/x86/vPMU: optimize intel vPMU
Date: Thu,  1 Nov 2018 18:04:03 +0800	[thread overview]
Message-ID: <1541066648-40690-4-git-send-email-wei.w.wang@intel.com> (raw)
In-Reply-To: <1541066648-40690-1-git-send-email-wei.w.wang@intel.com>

Current vPMU relies on the host perf software stack to update the guest
change of the perf counter MSRs. The whole process includes releasing the
old perf event and re-creating a new one. This results in around 2500000ns
overhead to update a perf counter control MSR.

This can be avoided by having the vPMU layer directly sits on the hardware
perf counters, and in this case the guest accesses to the virtual perf
counters can be directly applied to the related hardware counter by vPMU.
The guest used counters are taken from the host perf core via
x86_perf_mask_perf_counters, which in most cases is a bit-setting of the
guest mask.

This patch implements the handling of guest accesses to the perf counter
MSRs. A host perf counter is assigned to the guest when the guest has the
vPMC enabled and returned to the host when the vPMC gets disabled.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/kvm_host.h |   1 +
 arch/x86/kvm/pmu_intel.c        | 257 ++++++++++++++++++++++++++++++++--------
 2 files changed, 209 insertions(+), 49 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55e51ff..f8bc46d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -463,6 +463,7 @@ struct kvm_pmu {
 	u64 global_ovf_ctrl;
 	u64 counter_bitmask[2];
 	u64 global_ctrl_mask;
+	u64 assigned_pmc_bitmap;
 	u64 reserved_bits;
 	u8 version;
 	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 5ab4a36..8c2d37f 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -185,7 +185,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_fixed_pmc(pmu, msr))) {
-			*data = pmc_read_counter(pmc);
+			if (test_bit(pmc->idx,
+				(unsigned long *)&pmu->assigned_pmc_bitmap))
+				rdmsrl(msr, *data);
+			else
+				*data = pmc->counter;
 			return 0;
 		} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
 			*data = pmc->eventsel;
@@ -196,59 +200,210 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	return 1;
 }
 
+static void intel_pmu_update_fixed_ctrl_msr(u64 new_ctrl, u8 idx)
+{
+	u64 host_ctrl, mask;
+
+	rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, host_ctrl);
+	mask = 0xfULL << (idx * 4);
+	host_ctrl &= ~mask;
+	new_ctrl <<= (idx * 4);
+	host_ctrl |= new_ctrl;
+	wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, host_ctrl);
+}
+
+static void intel_pmu_save_pmc_counters(struct kvm_pmu *pmu, u32 idx)
+{
+	struct kvm_pmc *pmc;
+
+	pmc = intel_pmc_idx_to_pmc(pmu, idx);
+	/*
+	 * The control MSRs (pmc->eventsel and pmu->fixed_ctr_ctrl) always
+	 * store the updated value, so we only need to save the counter value.
+	 */
+	if (pmc->type == KVM_PMC_GP)
+		rdmsrl(MSR_IA32_PERFCTR0 + idx, pmc->counter);
+	else
+		rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + idx - INTEL_PMC_IDX_FIXED,
+		       pmc->counter);
+}
+
+static void intel_pmu_restore_pmc_counters(struct kvm_pmu *pmu, u32 idx)
+{
+	struct kvm_pmc *pmc;
+
+	pmc = intel_pmc_idx_to_pmc(pmu, idx);
+
+	if (pmc->type == KVM_PMC_GP) {
+		wrmsrl(MSR_IA32_PERFCTR0 + idx, pmc->counter);
+		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc->eventsel);
+	} else {
+		u8 ctrl;
+
+		idx -= INTEL_PMC_IDX_FIXED;
+		ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
+
+		wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + idx, pmc->counter);
+		intel_pmu_update_fixed_ctrl_msr(ctrl, idx);
+	}
+}
+
+/* Get the physical PMC from host and restore the vPMC states. */
+static inline void intel_pmu_get_pmc(struct kvm_pmu *pmu, unsigned int idx)
+{
+	/* Already assigned? */
+	if (test_bit(idx, (unsigned long *)&pmu->assigned_pmc_bitmap))
+		return;
+
+	set_bit(idx, (unsigned long *)&pmu->assigned_pmc_bitmap);
+	x86_perf_mask_perf_counters(pmu->assigned_pmc_bitmap);
+	intel_pmu_restore_pmc_counters(pmu, idx);
+}
+
+/* Save the physical PMC state and return it to host. */
+static inline void intel_pmu_put_pmc(struct kvm_pmu *pmu, unsigned int idx)
+{
+	/* Already returned? */
+	if (!test_bit(idx, (unsigned long *)&pmu->assigned_pmc_bitmap))
+		return;
+
+	intel_pmu_save_pmc_counters(pmu, idx);
+	clear_bit(idx, (unsigned long *)&pmu->assigned_pmc_bitmap);
+	x86_perf_mask_perf_counters(pmu->assigned_pmc_bitmap);
+}
+
+static int intel_pmu_set_fixed_ctrl(struct kvm_pmu *pmu,
+				    struct msr_data *msr_info)
+{
+	u8 old_ctrl, new_ctrl, pmc_idx, i;
+	u64 data = msr_info->data;
+
+	if (pmu->fixed_ctr_ctrl == data)
+		return 0;
+	if (unlikely(data & 0xfffffffffffff444ull))
+		return 1;
+
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+		old_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, i);
+		new_ctrl = fixed_ctrl_field(data, i);
+		if (old_ctrl == new_ctrl)
+			continue;
+
+		pmc_idx = INTEL_PMC_IDX_FIXED + i;
+		if (new_ctrl) {
+			/* Set the control after we own the pmc */
+			intel_pmu_get_pmc(pmu, pmc_idx);
+			intel_pmu_update_fixed_ctrl_msr((u64)new_ctrl, i);
+		} else {
+			/* Zero the control before we return the pmc */
+			intel_pmu_update_fixed_ctrl_msr((u64)new_ctrl, i);
+			intel_pmu_put_pmc(pmu, pmc_idx);
+		}
+	}
+	pmu->fixed_ctr_ctrl = data;
+
+	return 0;
+}
+
+static int intel_pmu_set_global_status(struct kvm_pmu *pmu,
+				       struct msr_data *msr_info)
+{
+	/* RO to the guest */
+	if (!msr_info->host_initiated)
+		return 1;
+
+	pmu->global_status = msr_info->data;
+	return 0;
+}
+
+static int intel_pmu_set_global_ctrl(struct kvm_pmu *pmu,
+				     struct msr_data *msr_info)
+{
+	u64 data = msr_info->data;
+
+	if (unlikely(data & pmu->global_ctrl_mask))
+		return 1;
+
+	pmu->global_ctrl = data;
+
+	return 0;
+}
+
+static int intel_pmu_set_ovf_ctrl(struct kvm_pmu *pmu,
+				  struct msr_data *msr_info)
+{
+	u64 data = msr_info->data;
+
+	if (unlikely(data & (pmu->global_ctrl_mask & ~(3ull<<62))))
+		return 1;
+
+	if (!msr_info->host_initiated)
+		pmu->global_status &= ~data;
+
+	pmu->global_ovf_ctrl = data;
+
+	return 0;
+}
+
+static int intel_pmu_set_gp_eventsel(struct kvm_pmc *pmc,
+				     struct msr_data *msr_info)
+{
+	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+	u64 data = msr_info->data;
+
+	if (pmc->eventsel == data)
+		return 0;
+
+	pmc->eventsel = data;
+
+	if (data & ARCH_PERFMON_EVENTSEL_ENABLE) {
+		intel_pmu_get_pmc(pmu, pmc->idx);
+		wrmsrl(msr_info->index, pmc->eventsel);
+	} else {
+		wrmsrl(msr_info->index, pmc->eventsel);
+		intel_pmu_put_pmc(pmu, pmc->idx);
+	}
+
+	return 0;
+}
+
+static int intel_pmu_set_pmc_counter(struct kvm_pmc *pmc,
+				     struct msr_data *msr_info)
+{
+	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+	pmc->counter = msr_info->data;
+	if (test_bit(pmc->idx, (unsigned long *)&pmu->assigned_pmc_bitmap))
+		wrmsrl(msr_info->index, pmc->counter);
+
+	return 0;
+}
+
 static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
 	u32 msr = msr_info->index;
-	u64 data = msr_info->data;
 
 	switch (msr) {
 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
-		if (pmu->fixed_ctr_ctrl == data)
-			return 0;
-		if (!(data & 0xfffffffffffff444ull)) {
-			reprogram_fixed_counters(pmu, data);
-			return 0;
-		}
-		break;
+		return intel_pmu_set_fixed_ctrl(pmu, msr_info);
 	case MSR_CORE_PERF_GLOBAL_STATUS:
-		if (msr_info->host_initiated) {
-			pmu->global_status = data;
-			return 0;
-		}
-		break; /* RO MSR */
+		return intel_pmu_set_global_status(pmu, msr_info);
 	case MSR_CORE_PERF_GLOBAL_CTRL:
-		if (pmu->global_ctrl == data)
-			return 0;
-		if (!(data & pmu->global_ctrl_mask)) {
-			global_ctrl_changed(pmu, data);
-			return 0;
-		}
-		break;
+		return intel_pmu_set_global_ctrl(pmu, msr_info);
 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
-			if (!msr_info->host_initiated)
-				pmu->global_status &= ~data;
-			pmu->global_ovf_ctrl = data;
-			return 0;
-		}
-		break;
+		return intel_pmu_set_ovf_ctrl(pmu, msr_info);
 	default:
-		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
-		    (pmc = get_fixed_pmc(pmu, msr))) {
-			if (!msr_info->host_initiated)
-				data = (s64)(s32)data;
-			pmc->counter += data - pmc_read_counter(pmc);
-			return 0;
-		} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
-			if (data == pmc->eventsel)
-				return 0;
-			if (!(data & pmu->reserved_bits)) {
-				reprogram_gp_counter(pmc, data);
-				return 0;
-			}
-		}
+		pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
+		if (pmc)
+			return intel_pmu_set_gp_eventsel(pmc, msr_info);
+		pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
+		if (pmc)
+			return intel_pmu_set_pmc_counter(pmc, msr_info);
+		pmc = get_fixed_pmc(pmu, msr);
+		if (pmc)
+			return intel_pmu_set_pmc_counter(pmc, msr_info);
 	}
 
 	return 1;
@@ -326,20 +481,24 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
 static void intel_pmu_reset(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc;
 	int i;
 
 	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
-		struct kvm_pmc *pmc = &pmu->gp_counters[i];
-
-		pmc_stop_counter(pmc);
+		pmc = &pmu->gp_counters[i];
 		pmc->counter = pmc->eventsel = 0;
 	}
 
-	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
-		pmc_stop_counter(&pmu->fixed_counters[i]);
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+		pmc = &pmu->fixed_counters[i];
+		pmc->counter = 0;
+	}
 
-	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
-		pmu->global_ovf_ctrl = 0;
+	pmu->fixed_ctr_ctrl = 0;
+	pmu->global_ctrl = 0;
+	pmu->global_status = 0;
+	pmu->global_ovf_ctrl = 0;
+	pmu->assigned_pmc_bitmap = 0;
 }
 
 struct kvm_pmu_ops intel_pmu_ops = {
-- 
2.7.4


  parent reply	other threads:[~2018-11-01 10:36 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-01 10:04 [PATCH v1 0/8] Intel Virtual PMU Optimization Wei Wang
2018-11-01 10:04 ` [PATCH v1 1/8] perf/x86: add support to mask counters from host Wei Wang
2018-11-01 14:52   ` Peter Zijlstra
2018-11-02  9:08     ` Wei Wang
2018-11-05  9:34       ` Peter Zijlstra
2018-11-05 11:19         ` Wei Wang
2018-11-05 12:14           ` Peter Zijlstra
2018-11-05 15:37             ` Wang, Wei W
2018-11-05 16:56               ` Peter Zijlstra
2018-11-05 18:20               ` Andi Kleen
2018-11-01 10:04 ` [PATCH v1 2/8] perf/x86/intel: add pmi callback support Wei Wang
2018-11-01 10:04 ` Wei Wang [this message]
2018-11-01 10:04 ` [PATCH v1 4/8] KVM/x86/vPMU: support msr switch on vmx transitions Wei Wang
2018-11-01 10:04 ` [PATCH v1 5/8] KVM/x86/vPMU: intel_pmu_read_pmc Wei Wang
2018-11-01 10:04 ` [PATCH v1 6/8] KVM/x86/vPMU: remove some unused functions Wei Wang
2018-11-01 10:04 ` [PATCH v1 7/8] KVM/x86/vPMU: save/restore guest perf counters on vCPU switching Wei Wang
2018-11-01 10:04 ` [PATCH v1 8/8] KVM/x86/vPMU: return the counters to host if guest is torn down Wei Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1541066648-40690-4-git-send-email-wei.w.wang@intel.com \
    --to=wei.w.wang@intel.com \
    --cc=ak@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=like.xu@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rkrcmar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).