From mboxrd@z Thu Jan 1 00:00:00 1970 From: Boris Ostrovsky Subject: [PATCH v19 14/14] x86/VPMU: Move VPMU files up from hvm/ directory Date: Tue, 17 Mar 2015 10:54:11 -0400 Message-ID: <1426604051-2980-15-git-send-email-boris.ostrovsky@oracle.com> References: <1426604051-2980-1-git-send-email-boris.ostrovsky@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1426604051-2980-1-git-send-email-boris.ostrovsky@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: JBeulich@suse.com, kevin.tian@intel.com, suravee.suthikulpanit@amd.com, Aravind.Gopalakrishnan@amd.com, dietmar.hahn@ts.fujitsu.com, dgdegra@tycho.nsa.gov, andrew.cooper3@citrix.com Cc: boris.ostrovsky@oracle.com, tim@xen.org, jun.nakajima@intel.com, xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org Since PMU is now not HVM specific we can move VPMU-related files up from arch/x86/hvm/ directory. Specifically: arch/x86/hvm/vpmu.c -> arch/x86/cpu/vpmu.c arch/x86/hvm/svm/vpmu.c -> arch/x86/cpu/vpmu_amd.c arch/x86/hvm/vmx/vpmu_core2.c -> arch/x86/cpu/vpmu_intel.c include/asm-x86/hvm/vpmu.h -> include/asm-x86/vpmu.h Signed-off-by: Boris Ostrovsky Acked-by: Jan Beulich Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Dietmar Hahn Tested-by: Dietmar Hahn --- xen/arch/x86/cpu/Makefile | 1 + xen/arch/x86/cpu/vpmu.c | 791 ++++++++++++++++++++++++++++ xen/arch/x86/cpu/vpmu_amd.c | 498 ++++++++++++++++++ xen/arch/x86/cpu/vpmu_intel.c | 939 ++++++++++++++++++++++++++++++++++ xen/arch/x86/hvm/Makefile | 1 - xen/arch/x86/hvm/svm/Makefile | 1 - xen/arch/x86/hvm/svm/vpmu.c | 498 ------------------ xen/arch/x86/hvm/vlapic.c | 2 +- xen/arch/x86/hvm/vmx/Makefile | 1 - xen/arch/x86/hvm/vmx/vpmu_core2.c | 939 ---------------------------------- xen/arch/x86/hvm/vpmu.c | 791 ---------------------------- xen/arch/x86/oprofile/op_model_ppro.c | 2 +- xen/arch/x86/traps.c | 2 +- xen/include/asm-x86/hvm/vmx/vmcs.h | 2 +- xen/include/asm-x86/hvm/vpmu.h | 143 ------ xen/include/asm-x86/vpmu.h | 143 ++++++ 16 files changed, 2376 insertions(+), 2378 deletions(-) create mode 100644 xen/arch/x86/cpu/vpmu.c create mode 100644 xen/arch/x86/cpu/vpmu_amd.c create mode 100644 xen/arch/x86/cpu/vpmu_intel.c delete mode 100644 xen/arch/x86/hvm/svm/vpmu.c delete mode 100644 xen/arch/x86/hvm/vmx/vpmu_core2.c delete mode 100644 xen/arch/x86/hvm/vpmu.c delete mode 100644 xen/include/asm-x86/hvm/vpmu.h create mode 100644 xen/include/asm-x86/vpmu.h diff --git a/xen/arch/x86/cpu/Makefile b/xen/arch/x86/cpu/Makefile index d73d93a..74f23ae 100644 --- a/xen/arch/x86/cpu/Makefile +++ b/xen/arch/x86/cpu/Makefile @@ -7,3 +7,4 @@ obj-y += common.o obj-y += intel.o obj-y += intel_cacheinfo.o obj-y += mwait-idle.o +obj-y += vpmu.o vpmu_amd.o vpmu_intel.o diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c new file mode 100644 index 0000000..2412e7b --- /dev/null +++ b/xen/arch/x86/cpu/vpmu.c @@ -0,0 +1,791 @@ +/* + * vpmu.c: PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +CHECK_pmu_params; +CHECK_pmu_intel_ctxt; +CHECK_pmu_amd_ctxt; +CHECK_pmu_cntr_pair; +CHECK_pmu_regs; + +/* + * "vpmu" : vpmu generally enabled + * "vpmu=off" : vpmu generally disabled + * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on. + */ +static unsigned int __read_mostly opt_vpmu_enabled; +unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF; +unsigned int __read_mostly vpmu_features = 0; +static void parse_vpmu_param(char *s); +custom_param("vpmu", parse_vpmu_param); + +static DEFINE_SPINLOCK(vpmu_lock); +static unsigned vpmu_count; + +static DEFINE_PER_CPU(struct vcpu *, last_vcpu); + +static void __init parse_vpmu_param(char *s) +{ + switch ( parse_bool(s) ) + { + case 0: + break; + default: + if ( !strcmp(s, "bts") ) + vpmu_features |= XENPMU_FEATURE_INTEL_BTS; + else if ( *s ) + { + printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); + break; + } + /* fall through */ + case 1: + /* Default VPMU mode */ + vpmu_mode = XENPMU_MODE_SELF; + opt_vpmu_enabled = 1; + break; + } +} + +void vpmu_lvtpc_update(uint32_t val) +{ + struct vpmu_struct *vpmu; + struct vcpu *curr; + + if ( vpmu_mode == XENPMU_MODE_OFF ) + return; + + curr = current; + vpmu = vcpu_vpmu(curr); + + vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); + + /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ + if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data || + !(vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) ) + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); +} + +int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, + uint64_t supported, bool_t is_write) +{ + struct vcpu *curr; + struct vpmu_struct *vpmu; + const struct arch_vpmu_ops *ops; + int ret = 0; + + if ( (vpmu_mode == XENPMU_MODE_OFF) || + ((vpmu_mode & XENPMU_MODE_ALL) && + !is_hardware_domain(current->domain)) ) + goto nop; + + curr = current; + vpmu = vcpu_vpmu(curr); + ops = vpmu->arch_vpmu_ops; + if ( !ops ) + goto nop; + + if ( is_write && ops->do_wrmsr ) + ret = ops->do_wrmsr(msr, *msr_content, supported); + else if ( !is_write && ops->do_rdmsr ) + ret = ops->do_rdmsr(msr, msr_content); + else + goto nop; + + /* + * We may have received a PMU interrupt while handling MSR access + * and since do_wr/rdmsr may load VPMU context we should save + * (and unload) it again. + */ + if ( !is_hvm_vcpu(curr) && + vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) ) + { + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + ops->arch_vpmu_save(curr); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + } + + return ret; + + nop: + if ( !is_write ) + *msr_content = 0; + + return 0; +} + +static inline struct vcpu *choose_hwdom_vcpu(void) +{ + unsigned idx; + + if ( hardware_domain->max_vcpus == 0 ) + return NULL; + + idx = smp_processor_id() % hardware_domain->max_vcpus; + + return hardware_domain->vcpu[idx]; +} + +void vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + struct vcpu *sampled = current, *sampling; + struct vpmu_struct *vpmu; + + /* + * dom0 will handle interrupt for special domains (e.g. idle domain) or, + * in XENPMU_MODE_ALL, for everyone. + */ + if ( (vpmu_mode & XENPMU_MODE_ALL) || + (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) + { + sampling = choose_hwdom_vcpu(); + if ( !sampling ) + return; + } + else + sampling = sampled; + + vpmu = vcpu_vpmu(sampling); + if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) ) + { + /* PV(H) guest */ + const struct cpu_user_regs *cur_regs; + uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags; + uint32_t domid; + + if ( !vpmu->xenpmu_data ) + return; + + if ( is_pvh_vcpu(sampling) && + !(vpmu_mode & XENPMU_MODE_ALL) && + !vpmu->arch_vpmu_ops->do_interrupt(regs) ) + return; + + if ( *flags & PMU_CACHED ) + return; + + /* PV guest will be reading PMU MSRs from xenpmu_data */ + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + vpmu->arch_vpmu_ops->arch_vpmu_save(sampling); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + + if ( has_hvm_container_vcpu(sampled) ) + *flags = 0; + else + *flags = PMU_SAMPLE_PV; + + if ( sampled == sampling ) + domid = DOMID_SELF; + else + domid = sampled->domain->domain_id; + + /* Store appropriate registers in xenpmu_data */ + /* FIXME: 32-bit PVH should go here as well */ + if ( is_pv_32bit_vcpu(sampling) ) + { + /* + * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) + * and therefore we treat it the same way as a non-privileged + * PV 32-bit domain. + */ + struct compat_pmu_regs *cmp; + + cur_regs = guest_cpu_user_regs(); + + cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; + cmp->ip = cur_regs->rip; + cmp->sp = cur_regs->rsp; + cmp->flags = cur_regs->eflags; + cmp->ss = cur_regs->ss; + cmp->cs = cur_regs->cs; + if ( (cmp->cs & 3) > 1 ) + *flags |= PMU_SAMPLE_USER; + } + else + { + struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs; + + if ( (vpmu_mode & XENPMU_MODE_SELF) ) + cur_regs = guest_cpu_user_regs(); + else if ( !guest_mode(regs) && + is_hardware_domain(sampling->domain) ) + { + cur_regs = regs; + domid = DOMID_XEN; + } + else + cur_regs = guest_cpu_user_regs(); + + r->ip = cur_regs->rip; + r->sp = cur_regs->rsp; + r->flags = cur_regs->eflags; + + if ( !has_hvm_container_vcpu(sampled) ) + { + r->ss = cur_regs->ss; + r->cs = cur_regs->cs; + if ( !(sampled->arch.flags & TF_kernel_mode) ) + *flags |= PMU_SAMPLE_USER; + } + else + { + struct segment_register seg; + + hvm_get_segment_register(sampled, x86_seg_cs, &seg); + r->cs = seg.sel; + hvm_get_segment_register(sampled, x86_seg_ss, &seg); + r->ss = seg.sel; + r->cpl = seg.attr.fields.dpl; + if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) + *flags |= PMU_SAMPLE_REAL; + } + } + + vpmu->xenpmu_data->domain_id = domid; + vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; + vpmu->xenpmu_data->pcpu_id = smp_processor_id(); + + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); + *flags |= PMU_CACHED; + + send_guest_vcpu_virq(sampling, VIRQ_XENPMU); + + return; + } + + if ( vpmu->arch_vpmu_ops ) + { + struct vlapic *vlapic = vcpu_vlapic(sampling); + u32 vlapic_lvtpc; + + /* We don't support (yet) HVM dom0 */ + ASSERT(sampling == sampled); + + if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) || + !is_vlapic_lvtpc_enabled(vlapic) ) + return; + + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); + + switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) ) + { + case APIC_MODE_FIXED: + vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0); + break; + case APIC_MODE_NMI: + sampling->nmi_pending = 1; + break; + } + } +} + +void vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid ) + vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx); +} + +static void vpmu_save_force(void *arg) +{ + struct vcpu *v = (struct vcpu *)arg; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return; + + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + + if ( vpmu->arch_vpmu_ops ) + (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); + + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); + + per_cpu(last_vcpu, smp_processor_id()) = NULL; +} + +void vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); + + if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) + return; + + vpmu->last_pcpu = pcpu; + per_cpu(last_vcpu, pcpu) = v; + + if ( vpmu->arch_vpmu_ops ) + if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + + apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); +} + +void vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); + struct vcpu *prev = NULL; + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + /* First time this VCPU is running here */ + if ( vpmu->last_pcpu != pcpu ) + { + /* + * Get the context from last pcpu that we ran on. Note that if another + * VCPU is running there it must have saved this VPCU's context before + * startig to run (see below). + * There should be no race since remote pcpu will disable interrupts + * before saving the context. + */ + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + on_selected_cpus(cpumask_of(vpmu->last_pcpu), + vpmu_save_force, (void *)v, 1); + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + } + } + + /* Prevent forced context save from remote CPU */ + local_irq_disable(); + + prev = per_cpu(last_vcpu, pcpu); + + if ( prev != v && prev ) + { + vpmu = vcpu_vpmu(prev); + + /* Someone ran here before us */ + vpmu_save_force(prev); + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + + vpmu = vcpu_vpmu(v); + } + + local_irq_enable(); + + /* Only when PMU is counting, we load PMU context immediately. */ + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || + (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && + (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED)) ) + return; + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) + { + apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); + /* Arch code needs to set VPMU_CONTEXT_LOADED */ + vpmu->arch_vpmu_ops->arch_vpmu_load(v); + } +} + +void vpmu_initialise(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t vendor = current_cpu_data.x86_vendor; + int ret; + + BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ); + BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ); + BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ); + BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ); + + ASSERT(!vpmu->flags && !vpmu->context); + + if ( v->domain != hardware_domain ) + { + spin_lock(&vpmu_lock); + vpmu_count++; /* Prevent vpmu_mode from changing until we are done */ + spin_unlock(&vpmu_lock); + } + + switch ( vendor ) + { + case X86_VENDOR_AMD: + ret = svm_vpmu_initialise(v); + break; + + case X86_VENDOR_INTEL: + ret = vmx_vpmu_initialise(v); + break; + + default: + if ( vpmu_mode != XENPMU_MODE_OFF ) + { + printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. " + "Disabling VPMU\n", vendor); + opt_vpmu_enabled = 0; + vpmu_mode = XENPMU_MODE_OFF; + } + return; /* Don't bother restoring vpmu_count, VPMU is off forever */ + } + + if ( ret ) + printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v); + + /* Intel needs to initialize VPMU ops even if VPMU is not in use */ + if ( (v->domain != hardware_domain) && + (ret || (vpmu_mode == XENPMU_MODE_OFF)) ) + { + spin_lock(&vpmu_lock); + vpmu_count--; + spin_unlock(&vpmu_lock); + } +} + +static void vpmu_clear_last(void *arg) +{ + if ( this_cpu(last_vcpu) == arg ) + this_cpu(last_vcpu) = NULL; +} + +void vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + /* + * Need to clear last_vcpu in case it points to v. + * We can check here non-atomically whether it is 'v' since + * last_vcpu can never become 'v' again at this point. + * We will test it again in vpmu_clear_last() with interrupts + * disabled to make sure we don't clear someone else. + */ + if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v ) + on_selected_cpus(cpumask_of(vpmu->last_pcpu), + vpmu_clear_last, v, 1); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) + { + /* Unload VPMU first. This will stop counters */ + on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu), + vpmu_save_force, v, 1); + vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); + } + + spin_lock(&vpmu_lock); + if ( v->domain != hardware_domain ) + vpmu_count--; + spin_unlock(&vpmu_lock); +} + +static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) +{ + struct vcpu *v; + struct vpmu_struct *vpmu; + struct page_info *page; + uint64_t gfn = params->val; + + if ( (vpmu_mode == XENPMU_MODE_OFF) || + ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) + return -EINVAL; + + if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) || + (d->vcpu[params->vcpu] == NULL) ) + return -EINVAL; + + page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); + if ( !page ) + return -EINVAL; + + if ( !get_page_type(page, PGT_writable_page) ) + { + put_page(page); + return -EINVAL; + } + + v = d->vcpu[params->vcpu]; + vpmu = vcpu_vpmu(v); + + spin_lock(&vpmu->vpmu_lock); + + if ( v->arch.vpmu.xenpmu_data ) + { + put_page_and_type(page); + spin_unlock(&vpmu->vpmu_lock); + return -EEXIST; + } + + v->arch.vpmu.xenpmu_data = __map_domain_page_global(page); + if ( !v->arch.vpmu.xenpmu_data ) + { + put_page_and_type(page); + spin_unlock(&vpmu->vpmu_lock); + return -ENOMEM; + } + + vpmu_initialise(v); + + spin_unlock(&vpmu->vpmu_lock); + + return 0; +} + +static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params) +{ + struct vcpu *v; + struct vpmu_struct *vpmu; + uint64_t mfn; + + if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) || + (d->vcpu[params->vcpu] == NULL) ) + return; + + v = d->vcpu[params->vcpu]; + if ( v != current ) + vcpu_pause(v); + + vpmu = vcpu_vpmu(v); + spin_lock(&vpmu->vpmu_lock); + + vpmu_destroy(v); + + if ( v->arch.vpmu.xenpmu_data ) + { + mfn = domain_page_map_to_mfn(v->arch.vpmu.xenpmu_data); + ASSERT(mfn != 0); + unmap_domain_page_global(v->arch.vpmu.xenpmu_data); + put_page_and_type(mfn_to_page(mfn)); + v->arch.vpmu.xenpmu_data = NULL; + } + + spin_unlock(&vpmu->vpmu_lock); + + if ( v != current ) + vcpu_unpause(v); +} + +/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */ +void vpmu_dump(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump ) + vpmu->arch_vpmu_ops->arch_vpmu_dump(v); +} + +long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) +{ + int ret; + struct vcpu *curr; + struct xen_pmu_params pmu_params = {.val = 0}; + struct xen_pmu_data *xenpmu_data; + + if ( !opt_vpmu_enabled ) + return -EOPNOTSUPP; + + ret = xsm_pmu_op(XSM_OTHER, current->domain, op); + if ( ret ) + return ret; + + /* Check major version when parameters are specified */ + switch ( op ) + { + case XENPMU_mode_set: + case XENPMU_feature_set: + case XENPMU_init: + case XENPMU_finish: + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + + if ( pmu_params.version.maj != XENPMU_VER_MAJ ) + return -EINVAL; + } + + switch ( op ) + { + case XENPMU_mode_set: + { + if ( (pmu_params.val & + ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) || + (hweight64(pmu_params.val) > 1) ) + return -EINVAL; + + /* 32-bit dom0 can only sample itself. */ + if ( is_pv_32bit_vcpu(current) && + (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) ) + return -EINVAL; + + spin_lock(&vpmu_lock); + + /* + * We can always safely switch between XENPMU_MODE_SELF and + * XENPMU_MODE_HV while other VPMUs are active. + */ + if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) || + ((vpmu_mode ^ pmu_params.val) == + (XENPMU_MODE_SELF | XENPMU_MODE_HV)) ) + vpmu_mode = pmu_params.val; + else + { + printk(XENLOG_WARNING "VPMU: Cannot change mode while" + " active VPMUs exist\n"); + ret = -EBUSY; + } + + spin_unlock(&vpmu_lock); + + break; + } + + case XENPMU_mode_get: + memset(&pmu_params, 0, sizeof(pmu_params)); + pmu_params.val = vpmu_mode; + + pmu_params.version.maj = XENPMU_VER_MAJ; + pmu_params.version.min = XENPMU_VER_MIN; + + if ( copy_to_guest(arg, &pmu_params, 1) ) + return -EFAULT; + + break; + + case XENPMU_feature_set: + if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS ) + return -EINVAL; + + spin_lock(&vpmu_lock); + + if ( vpmu_count == 0 ) + vpmu_features = pmu_params.val; + else + { + printk(XENLOG_WARNING "VPMU: Cannot change features while" + " active VPMUs exist\n"); + ret = -EBUSY; + } + + spin_unlock(&vpmu_lock); + + break; + + case XENPMU_feature_get: + pmu_params.val = vpmu_features; + if ( copy_field_to_guest(arg, &pmu_params, val) ) + return -EFAULT; + + break; + + case XENPMU_init: + ret = pvpmu_init(current->domain, &pmu_params); + break; + + case XENPMU_finish: + pvpmu_finish(current->domain, &pmu_params); + break; + + case XENPMU_lvtpc_set: + curr = current; + xenpmu_data = curr->arch.vpmu.xenpmu_data; + if ( xenpmu_data == NULL ) + return -EINVAL; + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); + break; + + case XENPMU_flush: + curr = current; + xenpmu_data = curr->arch.vpmu.xenpmu_data; + if ( xenpmu_data == NULL ) + return -EINVAL; + xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED; + vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); + vpmu_load(curr); + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static int __init vpmu_init(void) +{ + int vendor = current_cpu_data.x86_vendor; + + if ( !opt_vpmu_enabled ) + { + printk(XENLOG_INFO "VPMU: disabled\n"); + return 0; + } + + /* NMI watchdog uses LVTPC and HW counter */ + if ( opt_watchdog && opt_vpmu_enabled ) + { + printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n"); + opt_vpmu_enabled = 0; + vpmu_mode = XENPMU_MODE_OFF; + return 0; + } + + switch ( vendor ) + { + case X86_VENDOR_AMD: + if ( amd_vpmu_init() ) + vpmu_mode = XENPMU_MODE_OFF; + break; + case X86_VENDOR_INTEL: + if ( core2_vpmu_init() ) + vpmu_mode = XENPMU_MODE_OFF; + break; + default: + printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. " + "Turning VPMU off.\n", vendor); + vpmu_mode = XENPMU_MODE_OFF; + break; + } + + if ( vpmu_mode != XENPMU_MODE_OFF ) + printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "." + __stringify(XENPMU_VER_MIN) "\n"); + else + opt_vpmu_enabled = 0; + + return 0; +} +__initcall(vpmu_init); diff --git a/xen/arch/x86/cpu/vpmu_amd.c b/xen/arch/x86/cpu/vpmu_amd.c new file mode 100644 index 0000000..ebe1970 --- /dev/null +++ b/xen/arch/x86/cpu/vpmu_amd.c @@ -0,0 +1,498 @@ +/* + * vpmu.c: PMU virtualization for HVM domain. + * + * Copyright (c) 2010, Advanced Micro Devices, Inc. + * Parts of this code are Copyright (c) 2007, Intel Corporation + * + * Author: Wei Wang + * Tested by: Suravee Suthikulpanit + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MSR_F10H_EVNTSEL_GO_SHIFT 40 +#define MSR_F10H_EVNTSEL_EN_SHIFT 22 +#define MSR_F10H_COUNTER_LENGTH 48 + +#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) +#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT)) +#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) +#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1)))) + +static unsigned int __read_mostly num_counters; +static const u32 __read_mostly *counters; +static const u32 __read_mostly *ctrls; +static bool_t __read_mostly k7_counters_mirrored; + +#define F10H_NUM_COUNTERS 4 +#define F15H_NUM_COUNTERS 6 + +/* PMU Counter MSRs. */ +static const u32 AMD_F10H_COUNTERS[] = { + MSR_K7_PERFCTR0, + MSR_K7_PERFCTR1, + MSR_K7_PERFCTR2, + MSR_K7_PERFCTR3 +}; + +/* PMU Control MSRs. */ +static const u32 AMD_F10H_CTRLS[] = { + MSR_K7_EVNTSEL0, + MSR_K7_EVNTSEL1, + MSR_K7_EVNTSEL2, + MSR_K7_EVNTSEL3 +}; + +static const u32 AMD_F15H_COUNTERS[] = { + MSR_AMD_FAM15H_PERFCTR0, + MSR_AMD_FAM15H_PERFCTR1, + MSR_AMD_FAM15H_PERFCTR2, + MSR_AMD_FAM15H_PERFCTR3, + MSR_AMD_FAM15H_PERFCTR4, + MSR_AMD_FAM15H_PERFCTR5 +}; + +static const u32 AMD_F15H_CTRLS[] = { + MSR_AMD_FAM15H_EVNTSEL0, + MSR_AMD_FAM15H_EVNTSEL1, + MSR_AMD_FAM15H_EVNTSEL2, + MSR_AMD_FAM15H_EVNTSEL3, + MSR_AMD_FAM15H_EVNTSEL4, + MSR_AMD_FAM15H_EVNTSEL5 +}; + +/* Use private context as a flag for MSR bitmap */ +#define msr_bitmap_on(vpmu) do { \ + (vpmu)->priv_context = (void *)-1L; \ + } while (0) +#define msr_bitmap_off(vpmu) do { \ + (vpmu)->priv_context = NULL; \ + } while (0) +#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL) + +static inline int get_pmu_reg_type(u32 addr) +{ + if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) ) + return MSR_TYPE_CTRL; + + if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) ) + return MSR_TYPE_COUNTER; + + if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) && + (addr <= MSR_AMD_FAM15H_PERFCTR5 ) ) + { + if (addr & 1) + return MSR_TYPE_COUNTER; + else + return MSR_TYPE_CTRL; + } + + /* unsupported registers */ + return -1; +} + +static inline u32 get_fam15h_addr(u32 addr) +{ + switch ( addr ) + { + case MSR_K7_PERFCTR0: + return MSR_AMD_FAM15H_PERFCTR0; + case MSR_K7_PERFCTR1: + return MSR_AMD_FAM15H_PERFCTR1; + case MSR_K7_PERFCTR2: + return MSR_AMD_FAM15H_PERFCTR2; + case MSR_K7_PERFCTR3: + return MSR_AMD_FAM15H_PERFCTR3; + case MSR_K7_EVNTSEL0: + return MSR_AMD_FAM15H_EVNTSEL0; + case MSR_K7_EVNTSEL1: + return MSR_AMD_FAM15H_EVNTSEL1; + case MSR_K7_EVNTSEL2: + return MSR_AMD_FAM15H_EVNTSEL2; + case MSR_K7_EVNTSEL3: + return MSR_AMD_FAM15H_EVNTSEL3; + default: + break; + } + + return addr; +} + +static void amd_vpmu_set_msr_bitmap(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + for ( i = 0; i < num_counters; i++ ) + { + svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE); + svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE); + } + + msr_bitmap_on(vpmu); +} + +static void amd_vpmu_unset_msr_bitmap(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + for ( i = 0; i < num_counters; i++ ) + { + svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW); + svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW); + } + + msr_bitmap_off(vpmu); +} + +static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + return 1; +} + +static inline void context_load(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + + for ( i = 0; i < num_counters; i++ ) + { + wrmsrl(counters[i], counter_regs[i]); + wrmsrl(ctrls[i], ctrl_regs[i]); + } +} + +static void amd_vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + + vpmu_reset(vpmu, VPMU_FROZEN); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + unsigned int i; + + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], ctrl_regs[i]); + + return; + } + + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + + context_load(v); +} + +static inline void context_save(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + + /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */ + for ( i = 0; i < num_counters; i++ ) + rdmsrl(counters[i], counter_regs[i]); +} + +static int amd_vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + unsigned int i; + + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], 0); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + { + vpmu_set(vpmu, VPMU_FROZEN); + return 0; + } + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return 0; + + context_save(v); + + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && + has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) + amd_vpmu_unset_msr_bitmap(v); + + return 1; +} + +static void context_update(unsigned int msr, u64 msr_content) +{ + unsigned int i; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + + if ( k7_counters_mirrored && + ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) ) + { + msr = get_fam15h_addr(msr); + } + + for ( i = 0; i < num_counters; i++ ) + { + if ( msr == ctrls[i] ) + { + ctrl_regs[i] = msr_content; + return; + } + else if (msr == counters[i] ) + { + counter_regs[i] = msr_content; + return; + } + } +} + +static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, + uint64_t supported) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + ASSERT(!supported); + + /* For all counters, enable guest only mode for HVM guest */ + if ( has_hvm_container_vcpu(v) && + (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + !is_guest_mode(msr_content) ) + { + set_guest_mode(msr_content); + } + + /* check if the first counter is enabled */ + if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) + return 0; + vpmu_set(vpmu, VPMU_RUNNING); + + if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) + amd_vpmu_set_msr_bitmap(v); + } + + /* stop saving & restore if guest stops first counter */ + if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + vpmu_reset(vpmu, VPMU_RUNNING); + if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) + amd_vpmu_unset_msr_bitmap(v); + release_pmu_ownship(PMU_OWNER_HVM); + } + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) + { + context_load(v); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); + } + + /* Update vpmu context immediately */ + context_update(msr, msr_content); + + /* Write to hw counters */ + wrmsrl(msr, msr_content); + return 0; +} + +static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) + { + context_load(v); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); + } + + rdmsrl(msr, *msr_content); + + return 0; +} + +static void amd_vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( has_hvm_container_vcpu(v) ) + { + if ( is_msr_bitmap_on(vpmu) ) + amd_vpmu_unset_msr_bitmap(v); + + if ( is_hvm_vcpu(v) ) + xfree(vpmu->context); + + release_pmu_ownship(PMU_OWNER_HVM); + } + + vpmu->context = NULL; + vpmu_clear(vpmu); +} + +/* VPMU part of the 'q' keyhandler */ +static void amd_vpmu_dump(const struct vcpu *v) +{ + const struct vpmu_struct *vpmu = vcpu_vpmu(v); + const struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + const uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + const uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + unsigned int i; + + printk(" VPMU state: 0x%x ", vpmu->flags); + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + { + printk("\n"); + return; + } + + printk("("); + if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) ) + printk("PASSIVE_DOMAIN_ALLOCATED, "); + if ( vpmu_is_set(vpmu, VPMU_FROZEN) ) + printk("FROZEN, "); + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + printk("SAVE, "); + if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) + printk("RUNNING, "); + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + printk("LOADED, "); + printk("ALLOCATED)\n"); + + for ( i = 0; i < num_counters; i++ ) + { + uint64_t ctrl, cntr; + + rdmsrl(ctrls[i], ctrl); + rdmsrl(counters[i], cntr); + printk(" %#x: %#lx (%#lx in HW) %#x: %#lx (%#lx in HW)\n", + ctrls[i], ctrl_regs[i], ctrl, + counters[i], counter_regs[i], cntr); + } +} + +struct arch_vpmu_ops amd_vpmu_ops = { + .do_wrmsr = amd_vpmu_do_wrmsr, + .do_rdmsr = amd_vpmu_do_rdmsr, + .do_interrupt = amd_vpmu_do_interrupt, + .arch_vpmu_destroy = amd_vpmu_destroy, + .arch_vpmu_save = amd_vpmu_save, + .arch_vpmu_load = amd_vpmu_load, + .arch_vpmu_dump = amd_vpmu_dump +}; + +int svm_vpmu_initialise(struct vcpu *v) +{ + struct xen_pmu_amd_ctxt *ctxt; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu_mode == XENPMU_MODE_OFF ) + return 0; + + if ( !counters ) + return -EINVAL; + + if ( is_hvm_vcpu(v) ) + { + ctxt = xzalloc_bytes(sizeof(*ctxt) + + 2 * sizeof(uint64_t) * num_counters); + if ( !ctxt ) + { + printk(XENLOG_G_WARNING "%pv: Insufficient memory for PMU, " + " PMU feature is unavailable\n", v); + return -ENOMEM; + } + } + else + ctxt = &v->arch.vpmu.xenpmu_data->pmu.c.amd; + + ctxt->counters = sizeof(*ctxt); + ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters; + + vpmu->context = ctxt; + vpmu->priv_context = NULL; + + vpmu->arch_vpmu_ops = &amd_vpmu_ops; + + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + return 0; +} + +int __init amd_vpmu_init(void) +{ + switch ( current_cpu_data.x86 ) + { + case 0x15: + num_counters = F15H_NUM_COUNTERS; + counters = AMD_F15H_COUNTERS; + ctrls = AMD_F15H_CTRLS; + k7_counters_mirrored = 1; + break; + case 0x10: + case 0x12: + case 0x14: + case 0x16: + num_counters = F10H_NUM_COUNTERS; + counters = AMD_F10H_COUNTERS; + ctrls = AMD_F10H_CTRLS; + k7_counters_mirrored = 0; + break; + default: + printk(XENLOG_WARNING "VPMU: Unsupported CPU family %#x\n", + current_cpu_data.x86); + return -EINVAL; + } + + if ( sizeof(struct xen_pmu_data) + + 2 * sizeof(uint64_t) * num_counters > PAGE_SIZE ) + { + printk(XENLOG_WARNING + "VPMU: Register bank does not fit into VPMU shared page\n"); + counters = ctrls = NULL; + num_counters = 0; + return -ENOSPC; + } + + return 0; +} + diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c new file mode 100644 index 0000000..31dfdca --- /dev/null +++ b/xen/arch/x86/cpu/vpmu_intel.c @@ -0,0 +1,939 @@ +/* + * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID + * instruction. + * cpuid 0xa - Architectural Performance Monitoring Leaf + * Register eax + */ +#define PMU_VERSION_SHIFT 0 /* Version ID */ +#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */ +#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT) + +#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */ +#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */ +#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT) + +#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */ +#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */ +#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT) +/* Register edx */ +#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */ +#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */ +#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT) + +#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */ +#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */ +#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT) + +/* Alias registers (0x4c1) for full-width writes to PMCs */ +#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0)) +static bool_t __read_mostly full_width_write; + +/* Intel-specific VPMU features */ +#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */ +#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */ + +/* + * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed + * counters. 4 bits for every counter. + */ +#define FIXED_CTR_CTRL_BITS 4 +#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1) + +/* Number of general-purpose and fixed performance counters */ +static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt; + +/* + * QUIRK to workaround an issue on various family 6 cpus. + * The issue leads to endless PMC interrupt loops on the processor. + * If the interrupt handler is running and a pmc reaches the value 0, this + * value remains forever and it triggers immediately a new interrupt after + * finishing the handler. + * A workaround is to read all flagged counters and if the value is 0 write + * 1 (or another value != 0) into it. + * There exist no errata and the real cause of this behaviour is unknown. + */ +bool_t __read_mostly is_pmc_quirk; + +static void check_pmc_quirk(void) +{ + if ( current_cpu_data.x86 == 6 ) + is_pmc_quirk = 1; + else + is_pmc_quirk = 0; +} + +static void handle_pmc_quirk(u64 msr_content) +{ + int i; + u64 val; + + if ( !is_pmc_quirk ) + return; + + val = msr_content; + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + if ( val & 0x1 ) + { + u64 cnt; + rdmsrl(MSR_P6_PERFCTR(i), cnt); + if ( cnt == 0 ) + wrmsrl(MSR_P6_PERFCTR(i), 1); + } + val >>= 1; + } + val = msr_content >> 32; + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + if ( val & 0x1 ) + { + u64 cnt; + rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt); + if ( cnt == 0 ) + wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1); + } + val >>= 1; + } +} + +/* + * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15] + */ +static int core2_get_arch_pmc_count(void) +{ + u32 eax; + + eax = cpuid_eax(0xa); + return MASK_EXTR(eax, PMU_GENERAL_NR_MASK); +} + +/* + * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4] + */ +static int core2_get_fixed_pmc_count(void) +{ + u32 eax; + + eax = cpuid_eax(0xa); + return MASK_EXTR(eax, PMU_FIXED_NR_MASK); +} + +/* edx bits 5-12: Bit width of fixed-function performance counters */ +static int core2_get_bitwidth_fix_count(void) +{ + u32 edx; + + edx = cpuid_edx(0xa); + return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK); +} + +static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index) +{ + u32 msr_index_pmc; + + switch ( msr_index ) + { + case MSR_CORE_PERF_FIXED_CTR_CTRL: + case MSR_IA32_DS_AREA: + case MSR_IA32_PEBS_ENABLE: + *type = MSR_TYPE_CTRL; + return 1; + + case MSR_CORE_PERF_GLOBAL_CTRL: + case MSR_CORE_PERF_GLOBAL_STATUS: + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + *type = MSR_TYPE_GLOBAL; + return 1; + + default: + + if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) && + (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) ) + { + *index = msr_index - MSR_CORE_PERF_FIXED_CTR0; + *type = MSR_TYPE_COUNTER; + return 1; + } + + if ( (msr_index >= MSR_P6_EVNTSEL(0)) && + (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) ) + { + *index = msr_index - MSR_P6_EVNTSEL(0); + *type = MSR_TYPE_ARCH_CTRL; + return 1; + } + + msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; + if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) && + (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) ) + { + *type = MSR_TYPE_ARCH_COUNTER; + *index = msr_index_pmc - MSR_IA32_PERFCTR0; + return 1; + } + return 0; + } +} + +static inline int msraddr_to_bitpos(int x) +{ + ASSERT(x == (x & 0x1fff)); + return x; +} + +static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap) +{ + int i; + + /* Allow Read/Write PMU Counters MSR Directly. */ + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), + msr_bitmap + 0x800/BYTES_PER_LONG); + + if ( full_width_write ) + { + clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + } + + /* Allow Read PMU Non-global Controls Directly. */ + for ( i = 0; i < arch_pmc_cnt; i++ ) + clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap); + + clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); +} + +static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap) +{ + int i; + + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + + if ( full_width_write ) + { + set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + } + + for ( i = 0; i < arch_pmc_cnt; i++ ) + set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap); + + set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); +} + +static inline void __core2_vpmu_save(struct vcpu *v) +{ + int i; + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + + for ( i = 0; i < fixed_pmc_cnt; i++ ) + rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); + for ( i = 0; i < arch_pmc_cnt; i++ ) + rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter); + + if ( !has_hvm_container_vcpu(v) ) + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); +} + +static int core2_vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !has_hvm_container_vcpu(v) ) + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) ) + return 0; + + __core2_vpmu_save(v); + + /* Unset PMU MSR bitmap to trap lazy load. */ + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && + has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap ) + core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); + + return 1; +} + +static inline void __core2_vpmu_load(struct vcpu *v) +{ + unsigned int i, pmc_start; + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + + for ( i = 0; i < fixed_pmc_cnt; i++ ) + wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); + + if ( full_width_write ) + pmc_start = MSR_IA32_A_PERFCTR0; + else + pmc_start = MSR_IA32_PERFCTR0; + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter); + wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control); + } + + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl); + wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area); + wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable); + + if ( !has_hvm_container_vcpu(v) ) + { + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl); + core2_vpmu_cxt->global_ovf_ctrl = 0; + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); + } +} + +static void core2_vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return; + + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + + __core2_vpmu_load(v); +} + +static int core2_vpmu_alloc_resource(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; + uint64_t *p = NULL; + + p = xzalloc(uint64_t); + if ( !p ) + goto out_err; + + if ( has_hvm_container_vcpu(v) ) + { + if ( is_hvm_vcpu(v) && !acquire_pmu_ownership(PMU_OWNER_HVM) ) + goto out_err; + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) + goto out_err_hvm; + if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) + goto out_err_hvm; + vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + } + + if ( is_hvm_vcpu(v) ) + { + core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) + + sizeof(uint64_t) * fixed_pmc_cnt + + sizeof(struct xen_pmu_cntr_pair) * + arch_pmc_cnt); + if ( !core2_vpmu_cxt ) + goto out_err_hvm; + } + else + core2_vpmu_cxt = &v->arch.vpmu.xenpmu_data->pmu.c.intel; + + core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt); + core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters + + sizeof(uint64_t) * fixed_pmc_cnt; + + vpmu->context = core2_vpmu_cxt; + vpmu->priv_context = p; + + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + + return 1; + + out_err_hvm: + xfree(core2_vpmu_cxt); + if ( is_hvm_vcpu(v) ) + release_pmu_ownship(PMU_OWNER_HVM); + + out_err: + xfree(p); + + printk("Failed to allocate VPMU resources for domain %u vcpu %u\n", + v->vcpu_id, v->domain->domain_id); + + return 0; +} + +static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( !is_core2_vpmu_msr(msr_index, type, index) ) + return 0; + + if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) && + !core2_vpmu_alloc_resource(current) ) + return 0; + + /* Do the lazy load staff. */ + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + __core2_vpmu_load(current); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + if ( has_hvm_container_vcpu(current) && + cpu_has_vmx_msr_bitmap ) + core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap); + } + return 1; +} + +static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, + uint64_t supported) +{ + int i, tmp; + int type = -1, index = -1; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt; + uint64_t *enabled_cntrs; + + if ( !core2_vpmu_msr_common_check(msr, &type, &index) ) + { + /* Special handling for BTS */ + if ( msr == MSR_IA32_DEBUGCTLMSR ) + { + supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS | + IA32_DEBUGCTLMSR_BTINT; + + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) + supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS | + IA32_DEBUGCTLMSR_BTS_OFF_USR; + if ( !(msr_content & ~supported) && + vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) + return 0; + if ( (msr_content & supported) && + !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) + printk(XENLOG_G_WARNING + "%pv: Debug Store unsupported on this CPU\n", + current); + } + return 1; + } + + ASSERT(!supported); + + if ( type == MSR_TYPE_COUNTER && + (msr_content & + ~((1ull << core2_get_bitwidth_fix_count()) - 1)) ) + /* Writing unsupported bits to a fixed counter */ + return 1; + + core2_vpmu_cxt = vpmu->context; + enabled_cntrs = vpmu->priv_context; + switch ( msr ) + { + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + if ( msr_content & ~(0xC000000000000000 | + (((1ULL << fixed_pmc_cnt) - 1) << 32) | + ((1ULL << arch_pmc_cnt) - 1)) ) + return 1; + core2_vpmu_cxt->global_status &= ~msr_content; + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); + return 0; + case MSR_CORE_PERF_GLOBAL_STATUS: + gdprintk(XENLOG_INFO, "Can not write readonly MSR: " + "MSR_PERF_GLOBAL_STATUS(0x38E)!\n"); + return 1; + case MSR_IA32_PEBS_ENABLE: + if ( msr_content & 1 ) + gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, " + "which is not supported.\n"); + core2_vpmu_cxt->pebs_enable = msr_content; + return 0; + case MSR_IA32_DS_AREA: + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) + { + if ( !is_canonical_address(msr_content) ) + { + gdprintk(XENLOG_WARNING, + "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n", + msr_content); + return 1; + } + core2_vpmu_cxt->ds_area = msr_content; + break; + } + gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n"); + return 0; + case MSR_CORE_PERF_GLOBAL_CTRL: + core2_vpmu_cxt->global_ctrl = msr_content; + break; + case MSR_CORE_PERF_FIXED_CTR_CTRL: + if ( msr_content & + ( ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) ) + return 1; + + if ( has_hvm_container_vcpu(v) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, + &core2_vpmu_cxt->global_ctrl); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); + *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32); + if ( msr_content != 0 ) + { + u64 val = msr_content; + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + if ( val & 3 ) + *enabled_cntrs |= (1ULL << 32) << i; + val >>= FIXED_CTR_CTRL_BITS; + } + } + + core2_vpmu_cxt->fixed_ctrl = msr_content; + break; + default: + tmp = msr - MSR_P6_EVNTSEL(0); + if ( tmp >= 0 && tmp < arch_pmc_cnt ) + { + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + + if ( msr_content & (~((1ull << 32) - 1)) ) + return 1; + + if ( has_hvm_container_vcpu(v) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, + &core2_vpmu_cxt->global_ctrl); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); + + if ( msr_content & (1ULL << 22) ) + *enabled_cntrs |= 1ULL << tmp; + else + *enabled_cntrs &= ~(1ULL << tmp); + + xen_pmu_cntr_pair[tmp].control = msr_content; + } + } + + if ( type != MSR_TYPE_GLOBAL ) + wrmsrl(msr, msr_content); + else + { + if ( has_hvm_container_vcpu(v) ) + vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + else + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + } + + if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) || + (core2_vpmu_cxt->ds_area != 0) ) + vpmu_set(vpmu, VPMU_RUNNING); + else + vpmu_reset(vpmu, VPMU_RUNNING); + + return 0; +} + +static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + int type = -1, index = -1; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt; + + if ( core2_vpmu_msr_common_check(msr, &type, &index) ) + { + core2_vpmu_cxt = vpmu->context; + switch ( msr ) + { + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + *msr_content = 0; + break; + case MSR_CORE_PERF_GLOBAL_STATUS: + *msr_content = core2_vpmu_cxt->global_status; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + if ( has_hvm_container_vcpu(v) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content); + break; + default: + rdmsrl(msr, *msr_content); + } + } + else if ( msr == MSR_IA32_MISC_ENABLE ) + { + /* Extension for BTS */ + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) + *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; + } + + return 0; +} + +static void core2_vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + if (input == 0x1) + { + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) + { + /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */ + *edx |= cpufeat_mask(X86_FEATURE_DS); + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) ) + *ecx |= cpufeat_mask(X86_FEATURE_DTES64); + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) + *ecx |= cpufeat_mask(X86_FEATURE_DSCPL); + } + } +} + +/* Dump vpmu info on console, called in the context of keyhandler 'q'. */ +static void core2_vpmu_dump(const struct vcpu *v) +{ + const struct vpmu_struct *vpmu = vcpu_vpmu(v); + unsigned int i; + const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context; + u64 val; + uint64_t *fixed_counters; + struct xen_pmu_cntr_pair *cntr_pair; + + if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + printk(" vPMU loaded\n"); + else + printk(" vPMU allocated\n"); + return; + } + + printk(" vPMU running\n"); + + cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); + + /* Print the contents of the counter and its configuration msr. */ + for ( i = 0; i < arch_pmc_cnt; i++ ) + printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n", + i, cntr_pair[i].counter, cntr_pair[i].control); + + /* + * The configuration of the fixed counter is 4 bits each in the + * MSR_CORE_PERF_FIXED_CTR_CTRL. + */ + val = core2_vpmu_cxt->fixed_ctrl; + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + printk(" fixed_%d: 0x%016lx ctrl: %#lx\n", + i, fixed_counters[i], + val & FIXED_CTR_CTRL_MASK); + val >>= FIXED_CTR_CTRL_BITS; + } +} + +static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + u64 msr_content; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content); + if ( msr_content ) + { + if ( is_pmc_quirk ) + handle_pmc_quirk(msr_content); + core2_vpmu_cxt->global_status |= msr_content; + msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1); + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); + } + else + { + /* No PMC overflow but perhaps a Trace Message interrupt. */ + __vmread(GUEST_IA32_DEBUGCTL, &msr_content); + if ( !(msr_content & IA32_DEBUGCTLMSR_TR) ) + return 0; + } + + return 1; +} + +static void core2_vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( has_hvm_container_vcpu(v) ) + { + if ( cpu_has_vmx_msr_bitmap ) + core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); + + if ( is_hvm_vcpu(v) ) + xfree(vpmu->context); + + release_pmu_ownship(PMU_OWNER_HVM); + } + + xfree(vpmu->priv_context); + vpmu->context = NULL; + vpmu_clear(vpmu); +} + +struct arch_vpmu_ops core2_vpmu_ops = { + .do_wrmsr = core2_vpmu_do_wrmsr, + .do_rdmsr = core2_vpmu_do_rdmsr, + .do_interrupt = core2_vpmu_do_interrupt, + .do_cpuid = core2_vpmu_do_cpuid, + .arch_vpmu_destroy = core2_vpmu_destroy, + .arch_vpmu_save = core2_vpmu_save, + .arch_vpmu_load = core2_vpmu_load, + .arch_vpmu_dump = core2_vpmu_dump +}; + +static void core2_no_vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* + * As in this case the vpmu is not enabled reset some bits in the + * architectural performance monitoring related part. + */ + if ( input == 0xa ) + { + *eax &= ~PMU_VERSION_MASK; + *eax &= ~PMU_GENERAL_NR_MASK; + *eax &= ~PMU_GENERAL_WIDTH_MASK; + + *edx &= ~PMU_FIXED_NR_MASK; + *edx &= ~PMU_FIXED_WIDTH_MASK; + } +} + +/* + * If its a vpmu msr set it to 0. + */ +static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + int type = -1, index = -1; + if ( !is_core2_vpmu_msr(msr, &type, &index) ) + return 1; + *msr_content = 0; + return 0; +} + +/* + * These functions are used in case vpmu is not enabled. + */ +struct arch_vpmu_ops core2_no_vpmu_ops = { + .do_rdmsr = core2_no_vpmu_do_rdmsr, + .do_cpuid = core2_no_vpmu_do_cpuid, +}; + +int vmx_vpmu_initialise(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + u64 msr_content; + static bool_t ds_warned; + + vpmu->arch_vpmu_ops = &core2_no_vpmu_ops; + if ( vpmu_mode == XENPMU_MODE_OFF ) + return 0; + + if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 ) + return -EINVAL; + + if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) ) + goto func_out; + /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */ + while ( boot_cpu_has(X86_FEATURE_DS) ) + { + if ( !boot_cpu_has(X86_FEATURE_DTES64) ) + { + if ( !ds_warned ) + printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area" + " - Debug Store disabled for guests\n"); + break; + } + vpmu_set(vpmu, VPMU_CPU_HAS_DS); + rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); + if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL ) + { + /* If BTS_UNAVAIL is set reset the DS feature. */ + vpmu_reset(vpmu, VPMU_CPU_HAS_DS); + if ( !ds_warned ) + printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL" + " - Debug Store disabled for guests\n"); + break; + } + + vpmu_set(vpmu, VPMU_CPU_HAS_BTS); + if ( !ds_warned ) + { + if ( !boot_cpu_has(X86_FEATURE_DSCPL) ) + printk(XENLOG_G_INFO + "vpmu: CPU doesn't support CPL-Qualified BTS\n"); + printk("******************************************************\n"); + printk("** WARNING: Emulation of BTS Feature is switched on **\n"); + printk("** Using this processor feature in a virtualized **\n"); + printk("** environment is not 100%% safe. **\n"); + printk("** Setting the DS buffer address with wrong values **\n"); + printk("** may lead to hypervisor hangs or crashes. **\n"); + printk("** It is NOT recommended for production use! **\n"); + printk("******************************************************\n"); + } + break; + } + ds_warned = 1; + func_out: + + /* PV domains can allocate resources immediately */ + if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) ) + return -EIO; + + vpmu->arch_vpmu_ops = &core2_vpmu_ops; + + return 0; +} + +int __init core2_vpmu_init(void) +{ + u64 caps; + + if ( current_cpu_data.x86 != 6 ) + { + printk(XENLOG_WARNING "VPMU: only family 6 is supported\n"); + return -EINVAL; + } + + switch ( current_cpu_data.x86_model ) + { + /* Core2: */ + case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 0x1d: /* six-core 45 nm xeon "Dunnington" */ + + case 0x2a: /* SandyBridge */ + case 0x2d: /* SandyBridge, "Romley-EP" */ + + /* Nehalem: */ + case 0x1a: /* 45 nm nehalem, "Bloomfield" */ + case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */ + case 0x2e: /* 45 nm nehalem-ex, "Beckton" */ + + /* Westmere: */ + case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */ + case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */ + case 0x27: /* 32 nm Westmere-EX */ + + case 0x3a: /* IvyBridge */ + case 0x3e: /* IvyBridge EP */ + + /* Haswell: */ + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + + /* future: */ + case 0x3d: + case 0x4e: + break; + default: + printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n", + current_cpu_data.x86_model); + return -EINVAL; + } + + arch_pmc_cnt = core2_get_arch_pmc_count(); + fixed_pmc_cnt = core2_get_fixed_pmc_count(); + rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps); + full_width_write = (caps >> 13) & 1; + + check_pmc_quirk(); + + if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt + + sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE ) + { + printk(XENLOG_WARNING + "VPMU: Register bank does not fit into VPMU share page\n"); + arch_pmc_cnt = fixed_pmc_cnt = 0; + return -ENOSPC; + } + + return 0; +} diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile index eea5555..742b83b 100644 --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -22,4 +22,3 @@ obj-y += vlapic.o obj-y += vmsi.o obj-y += vpic.o obj-y += vpt.o -obj-y += vpmu.o \ No newline at end of file diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile index a10a55e..760d295 100644 --- a/xen/arch/x86/hvm/svm/Makefile +++ b/xen/arch/x86/hvm/svm/Makefile @@ -6,4 +6,3 @@ obj-y += nestedsvm.o obj-y += svm.o obj-y += svmdebug.o obj-y += vmcb.o -obj-y += vpmu.o diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c deleted file mode 100644 index 0997901..0000000 --- a/xen/arch/x86/hvm/svm/vpmu.c +++ /dev/null @@ -1,498 +0,0 @@ -/* - * vpmu.c: PMU virtualization for HVM domain. - * - * Copyright (c) 2010, Advanced Micro Devices, Inc. - * Parts of this code are Copyright (c) 2007, Intel Corporation - * - * Author: Wei Wang - * Tested by: Suravee Suthikulpanit - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MSR_F10H_EVNTSEL_GO_SHIFT 40 -#define MSR_F10H_EVNTSEL_EN_SHIFT 22 -#define MSR_F10H_COUNTER_LENGTH 48 - -#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) -#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT)) -#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) -#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1)))) - -static unsigned int __read_mostly num_counters; -static const u32 __read_mostly *counters; -static const u32 __read_mostly *ctrls; -static bool_t __read_mostly k7_counters_mirrored; - -#define F10H_NUM_COUNTERS 4 -#define F15H_NUM_COUNTERS 6 - -/* PMU Counter MSRs. */ -static const u32 AMD_F10H_COUNTERS[] = { - MSR_K7_PERFCTR0, - MSR_K7_PERFCTR1, - MSR_K7_PERFCTR2, - MSR_K7_PERFCTR3 -}; - -/* PMU Control MSRs. */ -static const u32 AMD_F10H_CTRLS[] = { - MSR_K7_EVNTSEL0, - MSR_K7_EVNTSEL1, - MSR_K7_EVNTSEL2, - MSR_K7_EVNTSEL3 -}; - -static const u32 AMD_F15H_COUNTERS[] = { - MSR_AMD_FAM15H_PERFCTR0, - MSR_AMD_FAM15H_PERFCTR1, - MSR_AMD_FAM15H_PERFCTR2, - MSR_AMD_FAM15H_PERFCTR3, - MSR_AMD_FAM15H_PERFCTR4, - MSR_AMD_FAM15H_PERFCTR5 -}; - -static const u32 AMD_F15H_CTRLS[] = { - MSR_AMD_FAM15H_EVNTSEL0, - MSR_AMD_FAM15H_EVNTSEL1, - MSR_AMD_FAM15H_EVNTSEL2, - MSR_AMD_FAM15H_EVNTSEL3, - MSR_AMD_FAM15H_EVNTSEL4, - MSR_AMD_FAM15H_EVNTSEL5 -}; - -/* Use private context as a flag for MSR bitmap */ -#define msr_bitmap_on(vpmu) do { \ - (vpmu)->priv_context = (void *)-1L; \ - } while (0) -#define msr_bitmap_off(vpmu) do { \ - (vpmu)->priv_context = NULL; \ - } while (0) -#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL) - -static inline int get_pmu_reg_type(u32 addr) -{ - if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) ) - return MSR_TYPE_CTRL; - - if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) ) - return MSR_TYPE_COUNTER; - - if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) && - (addr <= MSR_AMD_FAM15H_PERFCTR5 ) ) - { - if (addr & 1) - return MSR_TYPE_COUNTER; - else - return MSR_TYPE_CTRL; - } - - /* unsupported registers */ - return -1; -} - -static inline u32 get_fam15h_addr(u32 addr) -{ - switch ( addr ) - { - case MSR_K7_PERFCTR0: - return MSR_AMD_FAM15H_PERFCTR0; - case MSR_K7_PERFCTR1: - return MSR_AMD_FAM15H_PERFCTR1; - case MSR_K7_PERFCTR2: - return MSR_AMD_FAM15H_PERFCTR2; - case MSR_K7_PERFCTR3: - return MSR_AMD_FAM15H_PERFCTR3; - case MSR_K7_EVNTSEL0: - return MSR_AMD_FAM15H_EVNTSEL0; - case MSR_K7_EVNTSEL1: - return MSR_AMD_FAM15H_EVNTSEL1; - case MSR_K7_EVNTSEL2: - return MSR_AMD_FAM15H_EVNTSEL2; - case MSR_K7_EVNTSEL3: - return MSR_AMD_FAM15H_EVNTSEL3; - default: - break; - } - - return addr; -} - -static void amd_vpmu_set_msr_bitmap(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - for ( i = 0; i < num_counters; i++ ) - { - svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE); - svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE); - } - - msr_bitmap_on(vpmu); -} - -static void amd_vpmu_unset_msr_bitmap(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - for ( i = 0; i < num_counters; i++ ) - { - svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW); - svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW); - } - - msr_bitmap_off(vpmu); -} - -static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - return 1; -} - -static inline void context_load(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - - for ( i = 0; i < num_counters; i++ ) - { - wrmsrl(counters[i], counter_regs[i]); - wrmsrl(ctrls[i], ctrl_regs[i]); - } -} - -static void amd_vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - - vpmu_reset(vpmu, VPMU_FROZEN); - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - unsigned int i; - - for ( i = 0; i < num_counters; i++ ) - wrmsrl(ctrls[i], ctrl_regs[i]); - - return; - } - - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - - context_load(v); -} - -static inline void context_save(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - - /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */ - for ( i = 0; i < num_counters; i++ ) - rdmsrl(counters[i], counter_regs[i]); -} - -static int amd_vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - unsigned int i; - - for ( i = 0; i < num_counters; i++ ) - wrmsrl(ctrls[i], 0); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) - { - vpmu_set(vpmu, VPMU_FROZEN); - return 0; - } - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - return 0; - - context_save(v); - - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && - has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) - amd_vpmu_unset_msr_bitmap(v); - - return 1; -} - -static void context_update(unsigned int msr, u64 msr_content) -{ - unsigned int i; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - - if ( k7_counters_mirrored && - ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) ) - { - msr = get_fam15h_addr(msr); - } - - for ( i = 0; i < num_counters; i++ ) - { - if ( msr == ctrls[i] ) - { - ctrl_regs[i] = msr_content; - return; - } - else if (msr == counters[i] ) - { - counter_regs[i] = msr_content; - return; - } - } -} - -static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, - uint64_t supported) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - ASSERT(!supported); - - /* For all counters, enable guest only mode for HVM guest */ - if ( has_hvm_container_vcpu(v) && - (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - !is_guest_mode(msr_content) ) - { - set_guest_mode(msr_content); - } - - /* check if the first counter is enabled */ - if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) - return 0; - vpmu_set(vpmu, VPMU_RUNNING); - - if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) - amd_vpmu_set_msr_bitmap(v); - } - - /* stop saving & restore if guest stops first counter */ - if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - vpmu_reset(vpmu, VPMU_RUNNING); - if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) ) - amd_vpmu_unset_msr_bitmap(v); - release_pmu_ownship(PMU_OWNER_HVM); - } - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) - || vpmu_is_set(vpmu, VPMU_FROZEN) ) - { - context_load(v); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - vpmu_reset(vpmu, VPMU_FROZEN); - } - - /* Update vpmu context immediately */ - context_update(msr, msr_content); - - /* Write to hw counters */ - wrmsrl(msr, msr_content); - return 0; -} - -static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) - || vpmu_is_set(vpmu, VPMU_FROZEN) ) - { - context_load(v); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - vpmu_reset(vpmu, VPMU_FROZEN); - } - - rdmsrl(msr, *msr_content); - - return 0; -} - -static void amd_vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( has_hvm_container_vcpu(v) ) - { - if ( is_msr_bitmap_on(vpmu) ) - amd_vpmu_unset_msr_bitmap(v); - - if ( is_hvm_vcpu(v) ) - xfree(vpmu->context); - - release_pmu_ownship(PMU_OWNER_HVM); - } - - vpmu->context = NULL; - vpmu_clear(vpmu); -} - -/* VPMU part of the 'q' keyhandler */ -static void amd_vpmu_dump(const struct vcpu *v) -{ - const struct vpmu_struct *vpmu = vcpu_vpmu(v); - const struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - const uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - const uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - unsigned int i; - - printk(" VPMU state: 0x%x ", vpmu->flags); - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - { - printk("\n"); - return; - } - - printk("("); - if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) ) - printk("PASSIVE_DOMAIN_ALLOCATED, "); - if ( vpmu_is_set(vpmu, VPMU_FROZEN) ) - printk("FROZEN, "); - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) - printk("SAVE, "); - if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) - printk("RUNNING, "); - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - printk("LOADED, "); - printk("ALLOCATED)\n"); - - for ( i = 0; i < num_counters; i++ ) - { - uint64_t ctrl, cntr; - - rdmsrl(ctrls[i], ctrl); - rdmsrl(counters[i], cntr); - printk(" %#x: %#lx (%#lx in HW) %#x: %#lx (%#lx in HW)\n", - ctrls[i], ctrl_regs[i], ctrl, - counters[i], counter_regs[i], cntr); - } -} - -struct arch_vpmu_ops amd_vpmu_ops = { - .do_wrmsr = amd_vpmu_do_wrmsr, - .do_rdmsr = amd_vpmu_do_rdmsr, - .do_interrupt = amd_vpmu_do_interrupt, - .arch_vpmu_destroy = amd_vpmu_destroy, - .arch_vpmu_save = amd_vpmu_save, - .arch_vpmu_load = amd_vpmu_load, - .arch_vpmu_dump = amd_vpmu_dump -}; - -int svm_vpmu_initialise(struct vcpu *v) -{ - struct xen_pmu_amd_ctxt *ctxt; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu_mode == XENPMU_MODE_OFF ) - return 0; - - if ( !counters ) - return -EINVAL; - - if ( is_hvm_vcpu(v) ) - { - ctxt = xzalloc_bytes(sizeof(*ctxt) + - 2 * sizeof(uint64_t) * num_counters); - if ( !ctxt ) - { - printk(XENLOG_G_WARNING "%pv: Insufficient memory for PMU, " - " PMU feature is unavailable\n", v); - return -ENOMEM; - } - } - else - ctxt = &v->arch.vpmu.xenpmu_data->pmu.c.amd; - - ctxt->counters = sizeof(*ctxt); - ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters; - - vpmu->context = ctxt; - vpmu->priv_context = NULL; - - vpmu->arch_vpmu_ops = &amd_vpmu_ops; - - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - return 0; -} - -int __init amd_vpmu_init(void) -{ - switch ( current_cpu_data.x86 ) - { - case 0x15: - num_counters = F15H_NUM_COUNTERS; - counters = AMD_F15H_COUNTERS; - ctrls = AMD_F15H_CTRLS; - k7_counters_mirrored = 1; - break; - case 0x10: - case 0x12: - case 0x14: - case 0x16: - num_counters = F10H_NUM_COUNTERS; - counters = AMD_F10H_COUNTERS; - ctrls = AMD_F10H_CTRLS; - k7_counters_mirrored = 0; - break; - default: - printk(XENLOG_WARNING "VPMU: Unsupported CPU family %#x\n", - current_cpu_data.x86); - return -EINVAL; - } - - if ( sizeof(struct xen_pmu_data) + - 2 * sizeof(uint64_t) * num_counters > PAGE_SIZE ) - { - printk(XENLOG_WARNING - "VPMU: Register bank does not fit into VPMU shared page\n"); - counters = ctrls = NULL; - num_counters = 0; - return -ENOSPC; - } - - return 0; -} - diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index cee8699..56b168c 100644 --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -33,12 +33,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile index 373b3d9..04a29ce 100644 --- a/xen/arch/x86/hvm/vmx/Makefile +++ b/xen/arch/x86/hvm/vmx/Makefile @@ -3,5 +3,4 @@ obj-y += intr.o obj-y += realmode.o obj-y += vmcs.o obj-y += vmx.o -obj-y += vpmu_core2.o obj-y += vvmx.o diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c deleted file mode 100644 index 66d7bc0..0000000 --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c +++ /dev/null @@ -1,939 +0,0 @@ -/* - * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID - * instruction. - * cpuid 0xa - Architectural Performance Monitoring Leaf - * Register eax - */ -#define PMU_VERSION_SHIFT 0 /* Version ID */ -#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */ -#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT) - -#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */ -#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */ -#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT) - -#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */ -#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */ -#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT) -/* Register edx */ -#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */ -#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */ -#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT) - -#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */ -#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */ -#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT) - -/* Alias registers (0x4c1) for full-width writes to PMCs */ -#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0)) -static bool_t __read_mostly full_width_write; - -/* Intel-specific VPMU features */ -#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */ -#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */ - -/* - * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed - * counters. 4 bits for every counter. - */ -#define FIXED_CTR_CTRL_BITS 4 -#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1) - -/* Number of general-purpose and fixed performance counters */ -static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt; - -/* - * QUIRK to workaround an issue on various family 6 cpus. - * The issue leads to endless PMC interrupt loops on the processor. - * If the interrupt handler is running and a pmc reaches the value 0, this - * value remains forever and it triggers immediately a new interrupt after - * finishing the handler. - * A workaround is to read all flagged counters and if the value is 0 write - * 1 (or another value != 0) into it. - * There exist no errata and the real cause of this behaviour is unknown. - */ -bool_t __read_mostly is_pmc_quirk; - -static void check_pmc_quirk(void) -{ - if ( current_cpu_data.x86 == 6 ) - is_pmc_quirk = 1; - else - is_pmc_quirk = 0; -} - -static void handle_pmc_quirk(u64 msr_content) -{ - int i; - u64 val; - - if ( !is_pmc_quirk ) - return; - - val = msr_content; - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - if ( val & 0x1 ) - { - u64 cnt; - rdmsrl(MSR_P6_PERFCTR(i), cnt); - if ( cnt == 0 ) - wrmsrl(MSR_P6_PERFCTR(i), 1); - } - val >>= 1; - } - val = msr_content >> 32; - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - if ( val & 0x1 ) - { - u64 cnt; - rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt); - if ( cnt == 0 ) - wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1); - } - val >>= 1; - } -} - -/* - * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15] - */ -static int core2_get_arch_pmc_count(void) -{ - u32 eax; - - eax = cpuid_eax(0xa); - return MASK_EXTR(eax, PMU_GENERAL_NR_MASK); -} - -/* - * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4] - */ -static int core2_get_fixed_pmc_count(void) -{ - u32 eax; - - eax = cpuid_eax(0xa); - return MASK_EXTR(eax, PMU_FIXED_NR_MASK); -} - -/* edx bits 5-12: Bit width of fixed-function performance counters */ -static int core2_get_bitwidth_fix_count(void) -{ - u32 edx; - - edx = cpuid_edx(0xa); - return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK); -} - -static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index) -{ - u32 msr_index_pmc; - - switch ( msr_index ) - { - case MSR_CORE_PERF_FIXED_CTR_CTRL: - case MSR_IA32_DS_AREA: - case MSR_IA32_PEBS_ENABLE: - *type = MSR_TYPE_CTRL; - return 1; - - case MSR_CORE_PERF_GLOBAL_CTRL: - case MSR_CORE_PERF_GLOBAL_STATUS: - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - *type = MSR_TYPE_GLOBAL; - return 1; - - default: - - if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) && - (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) ) - { - *index = msr_index - MSR_CORE_PERF_FIXED_CTR0; - *type = MSR_TYPE_COUNTER; - return 1; - } - - if ( (msr_index >= MSR_P6_EVNTSEL(0)) && - (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) ) - { - *index = msr_index - MSR_P6_EVNTSEL(0); - *type = MSR_TYPE_ARCH_CTRL; - return 1; - } - - msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; - if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) && - (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) ) - { - *type = MSR_TYPE_ARCH_COUNTER; - *index = msr_index_pmc - MSR_IA32_PERFCTR0; - return 1; - } - return 0; - } -} - -static inline int msraddr_to_bitpos(int x) -{ - ASSERT(x == (x & 0x1fff)); - return x; -} - -static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap) -{ - int i; - - /* Allow Read/Write PMU Counters MSR Directly. */ - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), - msr_bitmap + 0x800/BYTES_PER_LONG); - - if ( full_width_write ) - { - clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - } - - /* Allow Read PMU Non-global Controls Directly. */ - for ( i = 0; i < arch_pmc_cnt; i++ ) - clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap); - - clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); -} - -static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap) -{ - int i; - - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - - if ( full_width_write ) - { - set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - } - - for ( i = 0; i < arch_pmc_cnt; i++ ) - set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap); - - set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); -} - -static inline void __core2_vpmu_save(struct vcpu *v) -{ - int i; - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; - uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); - struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = - vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - - for ( i = 0; i < fixed_pmc_cnt; i++ ) - rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); - for ( i = 0; i < arch_pmc_cnt; i++ ) - rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter); - - if ( !has_hvm_container_vcpu(v) ) - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); -} - -static int core2_vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !has_hvm_container_vcpu(v) ) - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) ) - return 0; - - __core2_vpmu_save(v); - - /* Unset PMU MSR bitmap to trap lazy load. */ - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && - has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap ) - core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); - - return 1; -} - -static inline void __core2_vpmu_load(struct vcpu *v) -{ - unsigned int i, pmc_start; - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; - uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); - struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = - vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - - for ( i = 0; i < fixed_pmc_cnt; i++ ) - wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); - - if ( full_width_write ) - pmc_start = MSR_IA32_A_PERFCTR0; - else - pmc_start = MSR_IA32_PERFCTR0; - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter); - wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control); - } - - wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl); - wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area); - wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable); - - if ( !has_hvm_container_vcpu(v) ) - { - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl); - core2_vpmu_cxt->global_ovf_ctrl = 0; - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); - } -} - -static void core2_vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - return; - - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - - __core2_vpmu_load(v); -} - -static int core2_vpmu_alloc_resource(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; - uint64_t *p = NULL; - - p = xzalloc(uint64_t); - if ( !p ) - goto out_err; - - if ( has_hvm_container_vcpu(v) ) - { - if ( is_hvm_vcpu(v) && !acquire_pmu_ownership(PMU_OWNER_HVM) ) - goto out_err; - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) - goto out_err_hvm; - if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) - goto out_err_hvm; - vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0); - } - - if ( is_hvm_vcpu(v) ) - { - core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) + - sizeof(uint64_t) * fixed_pmc_cnt + - sizeof(struct xen_pmu_cntr_pair) * - arch_pmc_cnt); - if ( !core2_vpmu_cxt ) - goto out_err_hvm; - } - else - core2_vpmu_cxt = &v->arch.vpmu.xenpmu_data->pmu.c.intel; - - core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt); - core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters + - sizeof(uint64_t) * fixed_pmc_cnt; - - vpmu->context = core2_vpmu_cxt; - vpmu->priv_context = p; - - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - - return 1; - - out_err_hvm: - xfree(core2_vpmu_cxt); - if ( is_hvm_vcpu(v) ) - release_pmu_ownship(PMU_OWNER_HVM); - - out_err: - xfree(p); - - printk("Failed to allocate VPMU resources for domain %u vcpu %u\n", - v->vcpu_id, v->domain->domain_id); - - return 0; -} - -static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( !is_core2_vpmu_msr(msr_index, type, index) ) - return 0; - - if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) && - !core2_vpmu_alloc_resource(current) ) - return 0; - - /* Do the lazy load staff. */ - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - __core2_vpmu_load(current); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - if ( has_hvm_container_vcpu(current) && - cpu_has_vmx_msr_bitmap ) - core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap); - } - return 1; -} - -static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, - uint64_t supported) -{ - int i, tmp; - int type = -1, index = -1; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt; - uint64_t *enabled_cntrs; - - if ( !core2_vpmu_msr_common_check(msr, &type, &index) ) - { - /* Special handling for BTS */ - if ( msr == MSR_IA32_DEBUGCTLMSR ) - { - supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS | - IA32_DEBUGCTLMSR_BTINT; - - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) - supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS | - IA32_DEBUGCTLMSR_BTS_OFF_USR; - if ( !(msr_content & ~supported) && - vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) - return 0; - if ( (msr_content & supported) && - !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) - printk(XENLOG_G_WARNING - "%pv: Debug Store unsupported on this CPU\n", - current); - } - return 1; - } - - ASSERT(!supported); - - if ( type == MSR_TYPE_COUNTER && - (msr_content & - ~((1ull << core2_get_bitwidth_fix_count()) - 1)) ) - /* Writing unsupported bits to a fixed counter */ - return 1; - - core2_vpmu_cxt = vpmu->context; - enabled_cntrs = vpmu->priv_context; - switch ( msr ) - { - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - if ( msr_content & ~(0xC000000000000000 | - (((1ULL << fixed_pmc_cnt) - 1) << 32) | - ((1ULL << arch_pmc_cnt) - 1)) ) - return 1; - core2_vpmu_cxt->global_status &= ~msr_content; - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); - return 0; - case MSR_CORE_PERF_GLOBAL_STATUS: - gdprintk(XENLOG_INFO, "Can not write readonly MSR: " - "MSR_PERF_GLOBAL_STATUS(0x38E)!\n"); - return 1; - case MSR_IA32_PEBS_ENABLE: - if ( msr_content & 1 ) - gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, " - "which is not supported.\n"); - core2_vpmu_cxt->pebs_enable = msr_content; - return 0; - case MSR_IA32_DS_AREA: - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) - { - if ( !is_canonical_address(msr_content) ) - { - gdprintk(XENLOG_WARNING, - "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n", - msr_content); - return 1; - } - core2_vpmu_cxt->ds_area = msr_content; - break; - } - gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n"); - return 0; - case MSR_CORE_PERF_GLOBAL_CTRL: - core2_vpmu_cxt->global_ctrl = msr_content; - break; - case MSR_CORE_PERF_FIXED_CTR_CTRL: - if ( msr_content & - ( ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) ) - return 1; - - if ( has_hvm_container_vcpu(v) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, - &core2_vpmu_cxt->global_ctrl); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); - *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32); - if ( msr_content != 0 ) - { - u64 val = msr_content; - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - if ( val & 3 ) - *enabled_cntrs |= (1ULL << 32) << i; - val >>= FIXED_CTR_CTRL_BITS; - } - } - - core2_vpmu_cxt->fixed_ctrl = msr_content; - break; - default: - tmp = msr - MSR_P6_EVNTSEL(0); - if ( tmp >= 0 && tmp < arch_pmc_cnt ) - { - struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = - vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - - if ( msr_content & (~((1ull << 32) - 1)) ) - return 1; - - if ( has_hvm_container_vcpu(v) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, - &core2_vpmu_cxt->global_ctrl); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); - - if ( msr_content & (1ULL << 22) ) - *enabled_cntrs |= 1ULL << tmp; - else - *enabled_cntrs &= ~(1ULL << tmp); - - xen_pmu_cntr_pair[tmp].control = msr_content; - } - } - - if ( type != MSR_TYPE_GLOBAL ) - wrmsrl(msr, msr_content); - else - { - if ( has_hvm_container_vcpu(v) ) - vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - else - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - } - - if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) || - (core2_vpmu_cxt->ds_area != 0) ) - vpmu_set(vpmu, VPMU_RUNNING); - else - vpmu_reset(vpmu, VPMU_RUNNING); - - return 0; -} - -static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - int type = -1, index = -1; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt; - - if ( core2_vpmu_msr_common_check(msr, &type, &index) ) - { - core2_vpmu_cxt = vpmu->context; - switch ( msr ) - { - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - *msr_content = 0; - break; - case MSR_CORE_PERF_GLOBAL_STATUS: - *msr_content = core2_vpmu_cxt->global_status; - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if ( has_hvm_container_vcpu(v) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content); - break; - default: - rdmsrl(msr, *msr_content); - } - } - else if ( msr == MSR_IA32_MISC_ENABLE ) - { - /* Extension for BTS */ - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) - *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; - } - - return 0; -} - -static void core2_vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - if (input == 0x1) - { - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) - { - /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */ - *edx |= cpufeat_mask(X86_FEATURE_DS); - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) ) - *ecx |= cpufeat_mask(X86_FEATURE_DTES64); - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) - *ecx |= cpufeat_mask(X86_FEATURE_DSCPL); - } - } -} - -/* Dump vpmu info on console, called in the context of keyhandler 'q'. */ -static void core2_vpmu_dump(const struct vcpu *v) -{ - const struct vpmu_struct *vpmu = vcpu_vpmu(v); - unsigned int i; - const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context; - u64 val; - uint64_t *fixed_counters; - struct xen_pmu_cntr_pair *cntr_pair; - - if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - printk(" vPMU loaded\n"); - else - printk(" vPMU allocated\n"); - return; - } - - printk(" vPMU running\n"); - - cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); - - /* Print the contents of the counter and its configuration msr. */ - for ( i = 0; i < arch_pmc_cnt; i++ ) - printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n", - i, cntr_pair[i].counter, cntr_pair[i].control); - - /* - * The configuration of the fixed counter is 4 bits each in the - * MSR_CORE_PERF_FIXED_CTR_CTRL. - */ - val = core2_vpmu_cxt->fixed_ctrl; - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - printk(" fixed_%d: 0x%016lx ctrl: %#lx\n", - i, fixed_counters[i], - val & FIXED_CTR_CTRL_MASK); - val >>= FIXED_CTR_CTRL_BITS; - } -} - -static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - u64 msr_content; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content); - if ( msr_content ) - { - if ( is_pmc_quirk ) - handle_pmc_quirk(msr_content); - core2_vpmu_cxt->global_status |= msr_content; - msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1); - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); - } - else - { - /* No PMC overflow but perhaps a Trace Message interrupt. */ - __vmread(GUEST_IA32_DEBUGCTL, &msr_content); - if ( !(msr_content & IA32_DEBUGCTLMSR_TR) ) - return 0; - } - - return 1; -} - -static void core2_vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( has_hvm_container_vcpu(v) ) - { - if ( cpu_has_vmx_msr_bitmap ) - core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); - - if ( is_hvm_vcpu(v) ) - xfree(vpmu->context); - - release_pmu_ownship(PMU_OWNER_HVM); - } - - xfree(vpmu->priv_context); - vpmu->context = NULL; - vpmu_clear(vpmu); -} - -struct arch_vpmu_ops core2_vpmu_ops = { - .do_wrmsr = core2_vpmu_do_wrmsr, - .do_rdmsr = core2_vpmu_do_rdmsr, - .do_interrupt = core2_vpmu_do_interrupt, - .do_cpuid = core2_vpmu_do_cpuid, - .arch_vpmu_destroy = core2_vpmu_destroy, - .arch_vpmu_save = core2_vpmu_save, - .arch_vpmu_load = core2_vpmu_load, - .arch_vpmu_dump = core2_vpmu_dump -}; - -static void core2_no_vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* - * As in this case the vpmu is not enabled reset some bits in the - * architectural performance monitoring related part. - */ - if ( input == 0xa ) - { - *eax &= ~PMU_VERSION_MASK; - *eax &= ~PMU_GENERAL_NR_MASK; - *eax &= ~PMU_GENERAL_WIDTH_MASK; - - *edx &= ~PMU_FIXED_NR_MASK; - *edx &= ~PMU_FIXED_WIDTH_MASK; - } -} - -/* - * If its a vpmu msr set it to 0. - */ -static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - int type = -1, index = -1; - if ( !is_core2_vpmu_msr(msr, &type, &index) ) - return 1; - *msr_content = 0; - return 0; -} - -/* - * These functions are used in case vpmu is not enabled. - */ -struct arch_vpmu_ops core2_no_vpmu_ops = { - .do_rdmsr = core2_no_vpmu_do_rdmsr, - .do_cpuid = core2_no_vpmu_do_cpuid, -}; - -int vmx_vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - u64 msr_content; - static bool_t ds_warned; - - vpmu->arch_vpmu_ops = &core2_no_vpmu_ops; - if ( vpmu_mode == XENPMU_MODE_OFF ) - return 0; - - if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 ) - return -EINVAL; - - if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) ) - goto func_out; - /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */ - while ( boot_cpu_has(X86_FEATURE_DS) ) - { - if ( !boot_cpu_has(X86_FEATURE_DTES64) ) - { - if ( !ds_warned ) - printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area" - " - Debug Store disabled for guests\n"); - break; - } - vpmu_set(vpmu, VPMU_CPU_HAS_DS); - rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); - if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL ) - { - /* If BTS_UNAVAIL is set reset the DS feature. */ - vpmu_reset(vpmu, VPMU_CPU_HAS_DS); - if ( !ds_warned ) - printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL" - " - Debug Store disabled for guests\n"); - break; - } - - vpmu_set(vpmu, VPMU_CPU_HAS_BTS); - if ( !ds_warned ) - { - if ( !boot_cpu_has(X86_FEATURE_DSCPL) ) - printk(XENLOG_G_INFO - "vpmu: CPU doesn't support CPL-Qualified BTS\n"); - printk("******************************************************\n"); - printk("** WARNING: Emulation of BTS Feature is switched on **\n"); - printk("** Using this processor feature in a virtualized **\n"); - printk("** environment is not 100%% safe. **\n"); - printk("** Setting the DS buffer address with wrong values **\n"); - printk("** may lead to hypervisor hangs or crashes. **\n"); - printk("** It is NOT recommended for production use! **\n"); - printk("******************************************************\n"); - } - break; - } - ds_warned = 1; - func_out: - - /* PV domains can allocate resources immediately */ - if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) ) - return -EIO; - - vpmu->arch_vpmu_ops = &core2_vpmu_ops; - - return 0; -} - -int __init core2_vpmu_init(void) -{ - u64 caps; - - if ( current_cpu_data.x86 != 6 ) - { - printk(XENLOG_WARNING "VPMU: only family 6 is supported\n"); - return -EINVAL; - } - - switch ( current_cpu_data.x86_model ) - { - /* Core2: */ - case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 0x1d: /* six-core 45 nm xeon "Dunnington" */ - - case 0x2a: /* SandyBridge */ - case 0x2d: /* SandyBridge, "Romley-EP" */ - - /* Nehalem: */ - case 0x1a: /* 45 nm nehalem, "Bloomfield" */ - case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */ - case 0x2e: /* 45 nm nehalem-ex, "Beckton" */ - - /* Westmere: */ - case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */ - case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */ - case 0x27: /* 32 nm Westmere-EX */ - - case 0x3a: /* IvyBridge */ - case 0x3e: /* IvyBridge EP */ - - /* Haswell: */ - case 0x3c: - case 0x3f: - case 0x45: - case 0x46: - - /* future: */ - case 0x3d: - case 0x4e: - break; - default: - printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n", - current_cpu_data.x86_model); - return -EINVAL; - } - - arch_pmc_cnt = core2_get_arch_pmc_count(); - fixed_pmc_cnt = core2_get_fixed_pmc_count(); - rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps); - full_width_write = (caps >> 13) & 1; - - check_pmc_quirk(); - - if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt + - sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE ) - { - printk(XENLOG_WARNING - "VPMU: Register bank does not fit into VPMU share page\n"); - arch_pmc_cnt = fixed_pmc_cnt = 0; - return -ENOSPC; - } - - return 0; -} diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c deleted file mode 100644 index 71c5063..0000000 --- a/xen/arch/x86/hvm/vpmu.c +++ /dev/null @@ -1,791 +0,0 @@ -/* - * vpmu.c: PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -CHECK_pmu_params; -CHECK_pmu_intel_ctxt; -CHECK_pmu_amd_ctxt; -CHECK_pmu_cntr_pair; -CHECK_pmu_regs; - -/* - * "vpmu" : vpmu generally enabled - * "vpmu=off" : vpmu generally disabled - * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on. - */ -static unsigned int __read_mostly opt_vpmu_enabled; -unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF; -unsigned int __read_mostly vpmu_features = 0; -static void parse_vpmu_param(char *s); -custom_param("vpmu", parse_vpmu_param); - -static DEFINE_SPINLOCK(vpmu_lock); -static unsigned vpmu_count; - -static DEFINE_PER_CPU(struct vcpu *, last_vcpu); - -static void __init parse_vpmu_param(char *s) -{ - switch ( parse_bool(s) ) - { - case 0: - break; - default: - if ( !strcmp(s, "bts") ) - vpmu_features |= XENPMU_FEATURE_INTEL_BTS; - else if ( *s ) - { - printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); - break; - } - /* fall through */ - case 1: - /* Default VPMU mode */ - vpmu_mode = XENPMU_MODE_SELF; - opt_vpmu_enabled = 1; - break; - } -} - -void vpmu_lvtpc_update(uint32_t val) -{ - struct vpmu_struct *vpmu; - struct vcpu *curr; - - if ( vpmu_mode == XENPMU_MODE_OFF ) - return; - - curr = current; - vpmu = vcpu_vpmu(curr); - - vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED); - - /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */ - if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data || - !(vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) ) - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); -} - -int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, - uint64_t supported, bool_t is_write) -{ - struct vcpu *curr; - struct vpmu_struct *vpmu; - const struct arch_vpmu_ops *ops; - int ret = 0; - - if ( (vpmu_mode == XENPMU_MODE_OFF) || - ((vpmu_mode & XENPMU_MODE_ALL) && - !is_hardware_domain(current->domain)) ) - goto nop; - - curr = current; - vpmu = vcpu_vpmu(curr); - ops = vpmu->arch_vpmu_ops; - if ( !ops ) - goto nop; - - if ( is_write && ops->do_wrmsr ) - ret = ops->do_wrmsr(msr, *msr_content, supported); - else if ( !is_write && ops->do_rdmsr ) - ret = ops->do_rdmsr(msr, msr_content); - else - goto nop; - - /* - * We may have received a PMU interrupt while handling MSR access - * and since do_wr/rdmsr may load VPMU context we should save - * (and unload) it again. - */ - if ( !is_hvm_vcpu(curr) && - vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED) ) - { - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - ops->arch_vpmu_save(curr); - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - } - - return ret; - - nop: - if ( !is_write ) - *msr_content = 0; - - return 0; -} - -static inline struct vcpu *choose_hwdom_vcpu(void) -{ - unsigned idx; - - if ( hardware_domain->max_vcpus == 0 ) - return NULL; - - idx = smp_processor_id() % hardware_domain->max_vcpus; - - return hardware_domain->vcpu[idx]; -} - -void vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - struct vcpu *sampled = current, *sampling; - struct vpmu_struct *vpmu; - - /* - * dom0 will handle interrupt for special domains (e.g. idle domain) or, - * in XENPMU_MODE_ALL, for everyone. - */ - if ( (vpmu_mode & XENPMU_MODE_ALL) || - (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) - { - sampling = choose_hwdom_vcpu(); - if ( !sampling ) - return; - } - else - sampling = sampled; - - vpmu = vcpu_vpmu(sampling); - if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) ) - { - /* PV(H) guest */ - const struct cpu_user_regs *cur_regs; - uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags; - uint32_t domid; - - if ( !vpmu->xenpmu_data ) - return; - - if ( is_pvh_vcpu(sampling) && - !(vpmu_mode & XENPMU_MODE_ALL) && - !vpmu->arch_vpmu_ops->do_interrupt(regs) ) - return; - - if ( *flags & PMU_CACHED ) - return; - - /* PV guest will be reading PMU MSRs from xenpmu_data */ - vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - vpmu->arch_vpmu_ops->arch_vpmu_save(sampling); - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - - if ( has_hvm_container_vcpu(sampled) ) - *flags = 0; - else - *flags = PMU_SAMPLE_PV; - - if ( sampled == sampling ) - domid = DOMID_SELF; - else - domid = sampled->domain->domain_id; - - /* Store appropriate registers in xenpmu_data */ - /* FIXME: 32-bit PVH should go here as well */ - if ( is_pv_32bit_vcpu(sampling) ) - { - /* - * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) - * and therefore we treat it the same way as a non-privileged - * PV 32-bit domain. - */ - struct compat_pmu_regs *cmp; - - cur_regs = guest_cpu_user_regs(); - - cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; - cmp->ip = cur_regs->rip; - cmp->sp = cur_regs->rsp; - cmp->flags = cur_regs->eflags; - cmp->ss = cur_regs->ss; - cmp->cs = cur_regs->cs; - if ( (cmp->cs & 3) > 1 ) - *flags |= PMU_SAMPLE_USER; - } - else - { - struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs; - - if ( (vpmu_mode & XENPMU_MODE_SELF) ) - cur_regs = guest_cpu_user_regs(); - else if ( !guest_mode(regs) && - is_hardware_domain(sampling->domain) ) - { - cur_regs = regs; - domid = DOMID_XEN; - } - else - cur_regs = guest_cpu_user_regs(); - - r->ip = cur_regs->rip; - r->sp = cur_regs->rsp; - r->flags = cur_regs->eflags; - - if ( !has_hvm_container_vcpu(sampled) ) - { - r->ss = cur_regs->ss; - r->cs = cur_regs->cs; - if ( !(sampled->arch.flags & TF_kernel_mode) ) - *flags |= PMU_SAMPLE_USER; - } - else - { - struct segment_register seg; - - hvm_get_segment_register(sampled, x86_seg_cs, &seg); - r->cs = seg.sel; - hvm_get_segment_register(sampled, x86_seg_ss, &seg); - r->ss = seg.sel; - r->cpl = seg.attr.fields.dpl; - if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) - *flags |= PMU_SAMPLE_REAL; - } - } - - vpmu->xenpmu_data->domain_id = domid; - vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; - vpmu->xenpmu_data->pcpu_id = smp_processor_id(); - - vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); - *flags |= PMU_CACHED; - - send_guest_vcpu_virq(sampling, VIRQ_XENPMU); - - return; - } - - if ( vpmu->arch_vpmu_ops ) - { - struct vlapic *vlapic = vcpu_vlapic(sampling); - u32 vlapic_lvtpc; - - /* We don't support (yet) HVM dom0 */ - ASSERT(sampling == sampled); - - if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) || - !is_vlapic_lvtpc_enabled(vlapic) ) - return; - - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); - - switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) ) - { - case APIC_MODE_FIXED: - vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0); - break; - case APIC_MODE_NMI: - sampling->nmi_pending = 1; - break; - } - } -} - -void vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid ) - vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx); -} - -static void vpmu_save_force(void *arg) -{ - struct vcpu *v = (struct vcpu *)arg; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - return; - - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - - if ( vpmu->arch_vpmu_ops ) - (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); - - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); - - per_cpu(last_vcpu, smp_processor_id()) = NULL; -} - -void vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - int pcpu = smp_processor_id(); - - if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) - return; - - vpmu->last_pcpu = pcpu; - per_cpu(last_vcpu, pcpu) = v; - - if ( vpmu->arch_vpmu_ops ) - if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - - apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED); -} - -void vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - int pcpu = smp_processor_id(); - struct vcpu *prev = NULL; - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - /* First time this VCPU is running here */ - if ( vpmu->last_pcpu != pcpu ) - { - /* - * Get the context from last pcpu that we ran on. Note that if another - * VCPU is running there it must have saved this VPCU's context before - * startig to run (see below). - * There should be no race since remote pcpu will disable interrupts - * before saving the context. - */ - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - on_selected_cpus(cpumask_of(vpmu->last_pcpu), - vpmu_save_force, (void *)v, 1); - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - } - } - - /* Prevent forced context save from remote CPU */ - local_irq_disable(); - - prev = per_cpu(last_vcpu, pcpu); - - if ( prev != v && prev ) - { - vpmu = vcpu_vpmu(prev); - - /* Someone ran here before us */ - vpmu_save_force(prev); - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - - vpmu = vcpu_vpmu(v); - } - - local_irq_enable(); - - /* Only when PMU is counting, we load PMU context immediately. */ - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || - (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && - (vpmu->xenpmu_data->pmu.pmu_flags & PMU_CACHED)) ) - return; - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) - { - apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); - /* Arch code needs to set VPMU_CONTEXT_LOADED */ - vpmu->arch_vpmu_ops->arch_vpmu_load(v); - } -} - -void vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t vendor = current_cpu_data.x86_vendor; - int ret; - - BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ); - BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ); - BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ); - BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ); - - ASSERT(!vpmu->flags && !vpmu->context); - - if ( v->domain != hardware_domain ) - { - spin_lock(&vpmu_lock); - vpmu_count++; /* Prevent vpmu_mode from changing until we are done */ - spin_unlock(&vpmu_lock); - } - - switch ( vendor ) - { - case X86_VENDOR_AMD: - ret = svm_vpmu_initialise(v); - break; - - case X86_VENDOR_INTEL: - ret = vmx_vpmu_initialise(v); - break; - - default: - if ( vpmu_mode != XENPMU_MODE_OFF ) - { - printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. " - "Disabling VPMU\n", vendor); - opt_vpmu_enabled = 0; - vpmu_mode = XENPMU_MODE_OFF; - } - return; /* Don't bother restoring vpmu_count, VPMU is off forever */ - } - - if ( ret ) - printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v); - - /* Intel needs to initialize VPMU ops even if VPMU is not in use */ - if ( (v->domain != hardware_domain) && - (ret || (vpmu_mode == XENPMU_MODE_OFF)) ) - { - spin_lock(&vpmu_lock); - vpmu_count--; - spin_unlock(&vpmu_lock); - } -} - -static void vpmu_clear_last(void *arg) -{ - if ( this_cpu(last_vcpu) == arg ) - this_cpu(last_vcpu) = NULL; -} - -void vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - /* - * Need to clear last_vcpu in case it points to v. - * We can check here non-atomically whether it is 'v' since - * last_vcpu can never become 'v' again at this point. - * We will test it again in vpmu_clear_last() with interrupts - * disabled to make sure we don't clear someone else. - */ - if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v ) - on_selected_cpus(cpumask_of(vpmu->last_pcpu), - vpmu_clear_last, v, 1); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) - { - /* Unload VPMU first. This will stop counters */ - on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu), - vpmu_save_force, v, 1); - vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); - } - - spin_lock(&vpmu_lock); - if ( v->domain != hardware_domain ) - vpmu_count--; - spin_unlock(&vpmu_lock); -} - -static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) -{ - struct vcpu *v; - struct vpmu_struct *vpmu; - struct page_info *page; - uint64_t gfn = params->val; - - if ( (vpmu_mode == XENPMU_MODE_OFF) || - ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) - return -EINVAL; - - if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) || - (d->vcpu[params->vcpu] == NULL) ) - return -EINVAL; - - page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); - if ( !page ) - return -EINVAL; - - if ( !get_page_type(page, PGT_writable_page) ) - { - put_page(page); - return -EINVAL; - } - - v = d->vcpu[params->vcpu]; - vpmu = vcpu_vpmu(v); - - spin_lock(&vpmu->vpmu_lock); - - if ( v->arch.vpmu.xenpmu_data ) - { - put_page_and_type(page); - spin_unlock(&vpmu->vpmu_lock); - return -EEXIST; - } - - v->arch.vpmu.xenpmu_data = __map_domain_page_global(page); - if ( !v->arch.vpmu.xenpmu_data ) - { - put_page_and_type(page); - spin_unlock(&vpmu->vpmu_lock); - return -ENOMEM; - } - - vpmu_initialise(v); - - spin_unlock(&vpmu->vpmu_lock); - - return 0; -} - -static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params) -{ - struct vcpu *v; - struct vpmu_struct *vpmu; - uint64_t mfn; - - if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) || - (d->vcpu[params->vcpu] == NULL) ) - return; - - v = d->vcpu[params->vcpu]; - if ( v != current ) - vcpu_pause(v); - - vpmu = vcpu_vpmu(v); - spin_lock(&vpmu->vpmu_lock); - - vpmu_destroy(v); - - if ( v->arch.vpmu.xenpmu_data ) - { - mfn = domain_page_map_to_mfn(v->arch.vpmu.xenpmu_data); - ASSERT(mfn != 0); - unmap_domain_page_global(v->arch.vpmu.xenpmu_data); - put_page_and_type(mfn_to_page(mfn)); - v->arch.vpmu.xenpmu_data = NULL; - } - - spin_unlock(&vpmu->vpmu_lock); - - if ( v != current ) - vcpu_unpause(v); -} - -/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */ -void vpmu_dump(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump ) - vpmu->arch_vpmu_ops->arch_vpmu_dump(v); -} - -long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) -{ - int ret; - struct vcpu *curr; - struct xen_pmu_params pmu_params = {.val = 0}; - struct xen_pmu_data *xenpmu_data; - - if ( !opt_vpmu_enabled ) - return -EOPNOTSUPP; - - ret = xsm_pmu_op(XSM_OTHER, current->domain, op); - if ( ret ) - return ret; - - /* Check major version when parameters are specified */ - switch ( op ) - { - case XENPMU_mode_set: - case XENPMU_feature_set: - case XENPMU_init: - case XENPMU_finish: - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - - if ( pmu_params.version.maj != XENPMU_VER_MAJ ) - return -EINVAL; - } - - switch ( op ) - { - case XENPMU_mode_set: - { - if ( (pmu_params.val & - ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) || - (hweight64(pmu_params.val) > 1) ) - return -EINVAL; - - /* 32-bit dom0 can only sample itself. */ - if ( is_pv_32bit_vcpu(current) && - (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) ) - return -EINVAL; - - spin_lock(&vpmu_lock); - - /* - * We can always safely switch between XENPMU_MODE_SELF and - * XENPMU_MODE_HV while other VPMUs are active. - */ - if ( (vpmu_count == 0) || (vpmu_mode == pmu_params.val) || - ((vpmu_mode ^ pmu_params.val) == - (XENPMU_MODE_SELF | XENPMU_MODE_HV)) ) - vpmu_mode = pmu_params.val; - else - { - printk(XENLOG_WARNING "VPMU: Cannot change mode while" - " active VPMUs exist\n"); - ret = -EBUSY; - } - - spin_unlock(&vpmu_lock); - - break; - } - - case XENPMU_mode_get: - memset(&pmu_params, 0, sizeof(pmu_params)); - pmu_params.val = vpmu_mode; - - pmu_params.version.maj = XENPMU_VER_MAJ; - pmu_params.version.min = XENPMU_VER_MIN; - - if ( copy_to_guest(arg, &pmu_params, 1) ) - return -EFAULT; - - break; - - case XENPMU_feature_set: - if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS ) - return -EINVAL; - - spin_lock(&vpmu_lock); - - if ( vpmu_count == 0 ) - vpmu_features = pmu_params.val; - else - { - printk(XENLOG_WARNING "VPMU: Cannot change features while" - " active VPMUs exist\n"); - ret = -EBUSY; - } - - spin_unlock(&vpmu_lock); - - break; - - case XENPMU_feature_get: - pmu_params.val = vpmu_features; - if ( copy_field_to_guest(arg, &pmu_params, val) ) - return -EFAULT; - - break; - - case XENPMU_init: - ret = pvpmu_init(current->domain, &pmu_params); - break; - - case XENPMU_finish: - pvpmu_finish(current->domain, &pmu_params); - break; - - case XENPMU_lvtpc_set: - curr = current; - xenpmu_data = curr->arch.vpmu.xenpmu_data; - if ( xenpmu_data == NULL ) - return -EINVAL; - vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); - break; - - case XENPMU_flush: - curr = current; - xenpmu_data = curr->arch.vpmu.xenpmu_data; - if ( xenpmu_data == NULL ) - return -EINVAL; - xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED; - vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc); - vpmu_load(curr); - break; - - default: - ret = -EINVAL; - } - - return ret; -} - -static int __init vpmu_init(void) -{ - int vendor = current_cpu_data.x86_vendor; - - if ( !opt_vpmu_enabled ) - { - printk(XENLOG_INFO "VPMU: disabled\n"); - return 0; - } - - /* NMI watchdog uses LVTPC and HW counter */ - if ( opt_watchdog && opt_vpmu_enabled ) - { - printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n"); - opt_vpmu_enabled = 0; - vpmu_mode = XENPMU_MODE_OFF; - return 0; - } - - switch ( vendor ) - { - case X86_VENDOR_AMD: - if ( amd_vpmu_init() ) - vpmu_mode = XENPMU_MODE_OFF; - break; - case X86_VENDOR_INTEL: - if ( core2_vpmu_init() ) - vpmu_mode = XENPMU_MODE_OFF; - break; - default: - printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. " - "Turning VPMU off.\n", vendor); - vpmu_mode = XENPMU_MODE_OFF; - break; - } - - if ( vpmu_mode != XENPMU_MODE_OFF ) - printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "." - __stringify(XENPMU_VER_MIN) "\n"); - else - opt_vpmu_enabled = 0; - - return 0; -} -__initcall(vpmu_init); diff --git a/xen/arch/x86/oprofile/op_model_ppro.c b/xen/arch/x86/oprofile/op_model_ppro.c index ca429a1..89649d0 100644 --- a/xen/arch/x86/oprofile/op_model_ppro.c +++ b/xen/arch/x86/oprofile/op_model_ppro.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "op_x86_model.h" #include "op_counter.h" diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 8a40deb..f861243 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -72,7 +72,7 @@ #include #include #include -#include +#include #include #include diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 6fce6aa..9f2e904 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -19,8 +19,8 @@ #ifndef __ASM_X86_HVM_VMX_VMCS_H__ #define __ASM_X86_HVM_VMX_VMCS_H__ +#include #include -#include #include extern void vmcs_dump_vcpu(struct vcpu *v); diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h deleted file mode 100644 index 63851a7..0000000 --- a/xen/include/asm-x86/hvm/vpmu.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * vpmu.h: PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan - */ - -#ifndef __ASM_X86_HVM_VPMU_H_ -#define __ASM_X86_HVM_VPMU_H_ - -#include - -#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu) -#define vpmu_vcpu(vpmu) container_of((vpmu), struct vcpu, arch.vpmu) - -#define MSR_TYPE_COUNTER 0 -#define MSR_TYPE_CTRL 1 -#define MSR_TYPE_GLOBAL 2 -#define MSR_TYPE_ARCH_COUNTER 3 -#define MSR_TYPE_ARCH_CTRL 4 - -/* Start of PMU register bank */ -#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \ - (uintptr_t)ctxt->offset)) - -/* Arch specific operations shared by all vpmus */ -struct arch_vpmu_ops { - int (*do_wrmsr)(unsigned int msr, uint64_t msr_content, - uint64_t supported); - int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content); - int (*do_interrupt)(struct cpu_user_regs *regs); - void (*do_cpuid)(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); - void (*arch_vpmu_destroy)(struct vcpu *v); - int (*arch_vpmu_save)(struct vcpu *v); - void (*arch_vpmu_load)(struct vcpu *v); - void (*arch_vpmu_dump)(const struct vcpu *); -}; - -int core2_vpmu_init(void); -int vmx_vpmu_initialise(struct vcpu *); -int amd_vpmu_init(void); -int svm_vpmu_initialise(struct vcpu *); - -struct vpmu_struct { - u32 flags; - u32 last_pcpu; - u32 hw_lapic_lvtpc; - void *context; /* May be shared with PV guest */ - void *priv_context; /* hypervisor-only */ - struct arch_vpmu_ops *arch_vpmu_ops; - struct xen_pmu_data *xenpmu_data; - spinlock_t vpmu_lock; -}; - -/* VPMU states */ -#define VPMU_CONTEXT_ALLOCATED 0x1 -#define VPMU_CONTEXT_LOADED 0x2 -#define VPMU_RUNNING 0x4 -#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ -#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */ -#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 - -static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask) -{ - vpmu->flags |= mask; -} -static inline void vpmu_reset(struct vpmu_struct *vpmu, const u32 mask) -{ - vpmu->flags &= ~mask; -} -static inline void vpmu_clear(struct vpmu_struct *vpmu) -{ - vpmu->flags = 0; -} -static inline bool_t vpmu_is_set(const struct vpmu_struct *vpmu, const u32 mask) -{ - return !!(vpmu->flags & mask); -} -static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu, - const u32 mask) -{ - return !!((vpmu->flags & mask) == mask); -} - -void vpmu_lvtpc_update(uint32_t val); -int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, - uint64_t supported, bool_t is_write); -void vpmu_do_interrupt(struct cpu_user_regs *regs); -void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); -void vpmu_initialise(struct vcpu *v); -void vpmu_destroy(struct vcpu *v); -void vpmu_save(struct vcpu *v); -void vpmu_load(struct vcpu *v); -void vpmu_dump(struct vcpu *v); - -static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, - uint64_t supported) -{ - return vpmu_do_msr(msr, &msr_content, supported, 1); -} -static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - return vpmu_do_msr(msr, msr_content, 0, 0); -} - -extern int acquire_pmu_ownership(int pmu_ownership); -extern void release_pmu_ownership(int pmu_ownership); - -extern unsigned int vpmu_mode; -extern unsigned int vpmu_features; - -/* Context switch */ -static inline void vpmu_switch_from(struct vcpu *prev) -{ - if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) ) - vpmu_save(prev); -} - -static inline void vpmu_switch_to(struct vcpu *next) -{ - if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) ) - vpmu_load(next); -} - -#endif /* __ASM_X86_HVM_VPMU_H_*/ - diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h new file mode 100644 index 0000000..63851a7 --- /dev/null +++ b/xen/include/asm-x86/vpmu.h @@ -0,0 +1,143 @@ +/* + * vpmu.h: PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan + */ + +#ifndef __ASM_X86_HVM_VPMU_H_ +#define __ASM_X86_HVM_VPMU_H_ + +#include + +#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu) +#define vpmu_vcpu(vpmu) container_of((vpmu), struct vcpu, arch.vpmu) + +#define MSR_TYPE_COUNTER 0 +#define MSR_TYPE_CTRL 1 +#define MSR_TYPE_GLOBAL 2 +#define MSR_TYPE_ARCH_COUNTER 3 +#define MSR_TYPE_ARCH_CTRL 4 + +/* Start of PMU register bank */ +#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \ + (uintptr_t)ctxt->offset)) + +/* Arch specific operations shared by all vpmus */ +struct arch_vpmu_ops { + int (*do_wrmsr)(unsigned int msr, uint64_t msr_content, + uint64_t supported); + int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content); + int (*do_interrupt)(struct cpu_user_regs *regs); + void (*do_cpuid)(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + void (*arch_vpmu_destroy)(struct vcpu *v); + int (*arch_vpmu_save)(struct vcpu *v); + void (*arch_vpmu_load)(struct vcpu *v); + void (*arch_vpmu_dump)(const struct vcpu *); +}; + +int core2_vpmu_init(void); +int vmx_vpmu_initialise(struct vcpu *); +int amd_vpmu_init(void); +int svm_vpmu_initialise(struct vcpu *); + +struct vpmu_struct { + u32 flags; + u32 last_pcpu; + u32 hw_lapic_lvtpc; + void *context; /* May be shared with PV guest */ + void *priv_context; /* hypervisor-only */ + struct arch_vpmu_ops *arch_vpmu_ops; + struct xen_pmu_data *xenpmu_data; + spinlock_t vpmu_lock; +}; + +/* VPMU states */ +#define VPMU_CONTEXT_ALLOCATED 0x1 +#define VPMU_CONTEXT_LOADED 0x2 +#define VPMU_RUNNING 0x4 +#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ +#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */ +#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 + +static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask) +{ + vpmu->flags |= mask; +} +static inline void vpmu_reset(struct vpmu_struct *vpmu, const u32 mask) +{ + vpmu->flags &= ~mask; +} +static inline void vpmu_clear(struct vpmu_struct *vpmu) +{ + vpmu->flags = 0; +} +static inline bool_t vpmu_is_set(const struct vpmu_struct *vpmu, const u32 mask) +{ + return !!(vpmu->flags & mask); +} +static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu, + const u32 mask) +{ + return !!((vpmu->flags & mask) == mask); +} + +void vpmu_lvtpc_update(uint32_t val); +int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, + uint64_t supported, bool_t is_write); +void vpmu_do_interrupt(struct cpu_user_regs *regs); +void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); +void vpmu_initialise(struct vcpu *v); +void vpmu_destroy(struct vcpu *v); +void vpmu_save(struct vcpu *v); +void vpmu_load(struct vcpu *v); +void vpmu_dump(struct vcpu *v); + +static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, + uint64_t supported) +{ + return vpmu_do_msr(msr, &msr_content, supported, 1); +} +static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + return vpmu_do_msr(msr, msr_content, 0, 0); +} + +extern int acquire_pmu_ownership(int pmu_ownership); +extern void release_pmu_ownership(int pmu_ownership); + +extern unsigned int vpmu_mode; +extern unsigned int vpmu_features; + +/* Context switch */ +static inline void vpmu_switch_from(struct vcpu *prev) +{ + if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) ) + vpmu_save(prev); +} + +static inline void vpmu_switch_to(struct vcpu *next) +{ + if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) ) + vpmu_load(next); +} + +#endif /* __ASM_X86_HVM_VPMU_H_*/ + -- 1.8.1.4