From mboxrd@z Thu Jan  1 00:00:00 1970
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v7 14/19] x86/VPMU: Handle PMU interrupts for PV
	guests
Date: Fri,  6 Jun 2014 13:40:10 -0400
Message-ID: <1402076415-26475-15-git-send-email-boris.ostrovsky@oracle.com>
References: <1402076415-26475-1-git-send-email-boris.ostrovsky@oracle.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Return-path: <xen-devel-bounces@lists.xen.org>
In-Reply-To: <1402076415-26475-1-git-send-email-boris.ostrovsky@oracle.com>
List-Unsubscribe: <http://lists.xen.org/cgi-bin/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xen.org>
List-Help: <mailto:xen-devel-request@lists.xen.org?subject=help>
List-Subscribe: <http://lists.xen.org/cgi-bin/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=subscribe>
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org
To: JBeulich@suse.com, kevin.tian@intel.com, dietmar.hahn@ts.fujitsu.com, suravee.suthikulpanit@amd.com
Cc: tim@xen.org, boris.ostrovsky@oracle.com, keir@xen.org, jun.nakajima@intel.com, xen-devel@lists.xen.org
List-Id: xen-devel@lists.xenproject.org

Add support for handling PMU interrupts for PV guests.

VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
hypercall. This allows the guest to access PMU MSR values that are stored in
VPMU context which is shared between hypervisor and domain, thus avoiding
traps to hypervisor.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
 xen/arch/x86/hvm/vpmu.c  | 134 +++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/public/pmu.h |   7 +++
 2 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index ca0534b..ce842e4 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -76,7 +76,12 @@ void vpmu_lvtpc_update(uint32_t val)
     struct vpmu_struct *vpmu = vcpu_vpmu(current);
 
     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
-    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+
+    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
+    if ( is_hvm_domain(current->domain) ||
+         !(current->arch.vpmu.xenpmu_data &&
+           (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED)) )
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
 }
 
 int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
@@ -87,7 +92,23 @@ int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
         return 0;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
-        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content);
+    {
+        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content);
+
+        /*
+         * We may have received a PMU interrupt during WRMSR handling
+         * and since do_wrmsr may load VPMU context we should save
+         * (and unload) it again.
+         */
+        if ( !is_hvm_domain(current->domain) &&
+             (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) )
+        {
+            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+            vpmu->arch_vpmu_ops->arch_vpmu_save(current);
+            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        }
+        return ret;
+    }
     return 0;
 }
 
@@ -99,14 +120,109 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
         return 0;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
-        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+    {
+        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
+
+        if ( !is_hvm_domain(current->domain) &&
+            (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) )
+        {
+            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+            vpmu->arch_vpmu_ops->arch_vpmu_save(current);
+            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        }
+        return ret;
+    }
     return 0;
 }
 
+static struct vcpu *choose_dom0_vcpu(void)
+{
+    struct vcpu *v;
+    unsigned idx = smp_processor_id() % hardware_domain->max_vcpus;
+
+    v = hardware_domain->vcpu[idx];
+
+    /*
+     * If index is not populated search downwards the vcpu array until
+     * a valid vcpu can be found
+     */
+    while ( !v && idx-- )
+        v = hardware_domain->vcpu[idx];
+
+    return v;
+}
+
 int vpmu_do_interrupt(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct vpmu_struct *vpmu;
+
+    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
+    if ( v->domain->domain_id >= DOMID_FIRST_RESERVED )
+    {
+        v = choose_dom0_vcpu();
+        if ( !v )
+            return 0;
+    }
+
+    vpmu = vcpu_vpmu(v);
+    if ( !is_hvm_domain(v->domain) )
+    {
+        /* PV(H) guest or dom0 is doing system profiling */
+        const struct cpu_user_regs *gregs;
+        int err;
+
+        if ( v->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED )
+            return 1;
+
+        if ( is_pvh_domain(current->domain) &&
+             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+            return 0;
+
+        /* PV guest will be reading PMU MSRs from xenpmu_data */
+        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        err = vpmu->arch_vpmu_ops->arch_vpmu_save(v);
+        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+
+        /* Store appropriate registers in xenpmu_data */
+        if ( is_pv_32bit_domain(current->domain) )
+        {
+            /*
+             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+             * and therefore we treat it the same way as a non-priviledged
+             * PV 32-bit domain.
+             */
+            struct compat_cpu_user_regs *cmp;
+
+            gregs = guest_cpu_user_regs();
+
+            cmp = (void *)&v->arch.vpmu.xenpmu_data->pmu.r.regs;
+            XLAT_cpu_user_regs(cmp, gregs);
+        }
+        else if ( !is_hardware_domain(current->domain) &&
+                 !is_idle_vcpu(current) )
+        {
+            /* PV(H) guest */
+            gregs = guest_cpu_user_regs();
+            memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs,
+                   gregs, sizeof(struct cpu_user_regs));
+        }
+        else
+            memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs,
+                   regs, sizeof(struct cpu_user_regs));
+
+        v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id;
+        v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id;
+        v->arch.vpmu.xenpmu_data->pcpu_id = smp_processor_id();
+
+        v->arch.vpmu.xenpmu_data->pmu_flags |= PMU_CACHED;
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
+        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
+
+        send_guest_vcpu_virq(v, VIRQ_XENPMU);
+
+        return 1;
+    }
 
     if ( vpmu->arch_vpmu_ops )
     {
@@ -225,7 +341,9 @@ void vpmu_load(struct vcpu *v)
     local_irq_enable();
 
     /* Only when PMU is counting, we load PMU context immediately. */
-    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+         (!is_hvm_domain(v->domain) &&
+          vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
         return;
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
@@ -463,6 +581,12 @@ long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
         vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
         ret = 0;
         break;
+    case XENPMU_flush:
+        current->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED;
+        vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
+        vpmu_load(current);
+        ret = 0;
+        break;
     }
 
     return ret;
diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
index 7341de9..67ce5de 100644
--- a/xen/include/public/pmu.h
+++ b/xen/include/public/pmu.h
@@ -28,6 +28,7 @@
 #define XENPMU_init            4
 #define XENPMU_finish          5
 #define XENPMU_lvtpc_set       6
+#define XENPMU_flush           7 /* Write cached MSR values to HW     */
 /* ` } */
 
 /* Parameters structure for HYPERVISOR_xenpmu_op call */
@@ -65,6 +66,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
  */
 #define XENPMU_FEATURE_INTEL_BTS  1
 
+/*
+ * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
+ * the hypervisor
+ */
+#define PMU_CACHED 1
+
 /* Shared between hypervisor and PV domain */
 struct xen_pmu_data {
     uint32_t domain_id;
-- 
1.8.1.4